From 12295ce7c038bdec7575c23676a7cc5bb9e0734d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Nov 2017 10:58:32 +0100 Subject: [PATCH] Removed aligns --- plugins/DensityMatrix/density_matrix.irp.f | 6 +- plugins/FOBOCI/SC2_1h1p.irp.f | 4 +- plugins/FOBOCI/density_matrix.irp.f | 20 +- plugins/FOBOCI/diag_fock_inactiv_virt.irp.f | 4 +- .../Hartree_Fock_SlaterDressed/dressing.irp.f | 10 +- plugins/MRCC_Utils/davidson.irp.f | 65 +-- plugins/Perturbation/pt2_equations.irp.f | 2 +- plugins/Properties/average.irp.f | 4 +- plugins/Properties/delta_rho.irp.f | 6 +- plugins/Properties/mulliken.irp.f | 6 +- plugins/Properties/routines_test.irp.f | 4 +- .../slater_rules_mono_electronic.irp.f | 22 +- .../Selectors_CASSD/NEEDED_CHILDREN_MODULES | 2 +- plugins/Selectors_CASSD/selectors.irp.f | 31 -- plugins/Selectors_CASSD/zmq.irp.f | 121 ---- .../Selectors_full/NEEDED_CHILDREN_MODULES | 2 +- plugins/Selectors_full/selectors.irp.f | 31 -- .../NEEDED_CHILDREN_MODULES | 2 +- .../e_corr_selectors.irp.f | 79 --- plugins/Selectors_no_sorted/selectors.irp.f | 20 - plugins/read_integral/read_integrals_mo.irp.f | 2 +- src/AO_Basis/ao_overlap.irp.f | 16 +- src/AO_Basis/aos.irp.f | 31 +- src/Bitmask/mpi.irp.f | 44 ++ src/Davidson/davidson_parallel.irp.f | 66 +++ src/Davidson/diagonalization.irp.f | 2 - src/Davidson/diagonalization_hs2.irp.f | 1 - src/Davidson/u0Hu0_old.irp.f | 518 ------------------ src/Determinants/filter_connected.irp.f | 12 +- src/Determinants/two_body_dm_map.irp.f | 3 +- src/Integrals_Bielec/ao_bi_integrals.irp.f | 70 +-- src/Integrals_Bielec/gauss_legendre.irp.f | 13 +- src/Integrals_Bielec/integrals_3_index.irp.f | 4 +- src/Integrals_Bielec/mo_bi_integrals.irp.f | 97 ++-- src/Integrals_Monoelec/ao_mono_ints.irp.f | 3 +- src/Integrals_Monoelec/kin_mo_ints.irp.f | 2 +- src/Integrals_Monoelec/mo_mono_ints.irp.f | 2 +- .../pot_ao_pseudo_ints.irp.f | 6 +- src/Integrals_Monoelec/pot_mo_ints.irp.f | 4 +- .../pot_mo_pseudo_ints.irp.f | 2 +- src/Integrals_Monoelec/spread_dipole_ao.irp.f | 27 +- src/Integrals_Monoelec/spread_dipole_mo.irp.f | 12 +- src/MOGuess/mo_ortho_lowdin.irp.f | 8 +- src/MOGuess/pot_mo_ortho_canonical_ints.irp.f | 2 +- src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f | 2 +- src/MO_Basis/ao_ortho_canonical.irp.f | 6 +- src/MO_Basis/mo_overlap.irp.f | 4 +- src/MO_Basis/mo_permutation.irp.f | 2 +- src/MO_Basis/mos.irp.f | 51 +- src/MO_Basis/swap_mos.irp.f | 2 +- src/MO_Basis/utils.irp.f | 10 +- {plugins => src}/MPI/.gitignore | 0 {plugins => src}/MPI/NEEDED_CHILDREN_MODULES | 0 {plugins => src}/MPI/README.rst | 0 {plugins => src}/MPI/mpi.irp.f | 30 +- src/Nuclei/nuclei.irp.f | 31 +- src/Selectors_Utils/NEEDED_CHILDREN_MODULES | 1 + src/Selectors_Utils/README.rst | 190 +++++++ .../Selectors_Utils}/e_corr_selectors.irp.f | 1 - src/Selectors_Utils/selectors.irp.f | 34 ++ .../Selectors_Utils}/zmq.irp.f | 88 +-- src/Utils/LinearAlgebra.irp.f | 2 +- src/Utils/integration.irp.f | 22 +- src/Utils/transpose.irp.f | 4 +- src/Utils/util.irp.f | 17 +- src/ZMQ/put_get.irp.f | 76 +++ 66 files changed, 649 insertions(+), 1312 deletions(-) delete mode 100644 plugins/Selectors_CASSD/zmq.irp.f delete mode 100644 plugins/Selectors_no_sorted/e_corr_selectors.irp.f create mode 100644 src/Bitmask/mpi.irp.f delete mode 100644 src/Davidson/u0Hu0_old.irp.f rename {plugins => src}/MPI/.gitignore (100%) rename {plugins => src}/MPI/NEEDED_CHILDREN_MODULES (100%) rename {plugins => src}/MPI/README.rst (100%) rename {plugins => src}/MPI/mpi.irp.f (72%) create mode 100644 src/Selectors_Utils/NEEDED_CHILDREN_MODULES create mode 100644 src/Selectors_Utils/README.rst rename {plugins/Selectors_full => src/Selectors_Utils}/e_corr_selectors.irp.f (99%) create mode 100644 src/Selectors_Utils/selectors.irp.f rename {plugins/Selectors_full => src/Selectors_Utils}/zmq.irp.f (73%) create mode 100644 src/ZMQ/put_get.irp.f diff --git a/plugins/DensityMatrix/density_matrix.irp.f b/plugins/DensityMatrix/density_matrix.irp.f index 3fc1d6f9..5e7d7cec 100644 --- a/plugins/DensityMatrix/density_matrix.irp.f +++ b/plugins/DensityMatrix/density_matrix.irp.f @@ -167,9 +167,9 @@ END_PROVIDER END_TEMPLATE - BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num_align,mo_tot_num)] -&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num_align,mo_tot_num)] -&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num_align,mo_tot_num)] + BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num,mo_tot_num)] +&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num,mo_tot_num)] +&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num,mo_tot_num)] implicit none use bitmasks BEGIN_DOC diff --git a/plugins/FOBOCI/SC2_1h1p.irp.f b/plugins/FOBOCI/SC2_1h1p.irp.f index 7733831c..58f47552 100644 --- a/plugins/FOBOCI/SC2_1h1p.irp.f +++ b/plugins/FOBOCI/SC2_1h1p.irp.f @@ -725,8 +725,8 @@ subroutine density_matrix_1h1p(dets_in,u_in,density_matrix_alpha,density_matrix_ integer, intent(in) :: dim_in, sze, N_st, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(inout) :: u_in(dim_in,N_st) - double precision, intent(inout) :: density_matrix_alpha(mo_tot_num_align,mo_tot_num) - double precision, intent(inout) :: density_matrix_beta(mo_tot_num_align,mo_tot_num) + double precision, intent(inout) :: density_matrix_alpha(mo_tot_num,mo_tot_num) + double precision, intent(inout) :: density_matrix_beta(mo_tot_num,mo_tot_num) double precision, intent(inout) :: norm integer :: i,j,k,l diff --git a/plugins/FOBOCI/density_matrix.irp.f b/plugins/FOBOCI/density_matrix.irp.f index aaf80c4f..0e668de5 100644 --- a/plugins/FOBOCI/density_matrix.irp.f +++ b/plugins/FOBOCI/density_matrix.irp.f @@ -1,5 +1,5 @@ - BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_generators_restart, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_generators_restart, (mo_tot_num_align,mo_tot_num) ] + BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_generators_restart, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_generators_restart, (mo_tot_num,mo_tot_num) ] &BEGIN_PROVIDER [ double precision, norm_generators_restart] implicit none BEGIN_DOC @@ -40,9 +40,9 @@ !$OMP PRIVATE(j,k,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc, & !$OMP tmp_a, tmp_b, n_occ_alpha)& !$OMP SHARED(psi_det_generators_restart,psi_coef_generators_restart,N_int,elec_alpha_num,& - !$OMP elec_beta_num,one_body_dm_mo_alpha_generators_restart,one_body_dm_mo_beta_generators_restart,N_det_generators_restart,mo_tot_num_align,& + !$OMP elec_beta_num,one_body_dm_mo_alpha_generators_restart,one_body_dm_mo_beta_generators_restart,N_det_generators_restart,& !$OMP mo_tot_num,N_states, state_average_weight) - allocate(tmp_a(mo_tot_num_align,mo_tot_num), tmp_b(mo_tot_num_align,mo_tot_num) ) + allocate(tmp_a(mo_tot_num,mo_tot_num), tmp_b(mo_tot_num,mo_tot_num) ) tmp_a = 0.d0 tmp_b = 0.d0 !$OMP DO SCHEDULE(dynamic) @@ -98,7 +98,7 @@ END_PROVIDER -BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_num_align,mo_tot_num) ] +BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! One-body density matrix for the generators_restart @@ -106,7 +106,7 @@ BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_nu one_body_dm_mo_generators_restart = one_body_dm_mo_alpha_generators_restart + one_body_dm_mo_beta_generators_restart END_PROVIDER -BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, (mo_tot_num_align,mo_tot_num) ] +BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! rho(alpha) - rho(beta) @@ -115,16 +115,16 @@ BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, END_PROVIDER - BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_osoci, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_osoci, (mo_tot_num_align,mo_tot_num) ] + BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_osoci, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_osoci, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! Alpha and beta one-body density matrix that will be used for the OSOCI approach END_DOC END_PROVIDER - BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_1h1p, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_1h1p, (mo_tot_num_align,mo_tot_num) ] + BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_1h1p, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_1h1p, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! Alpha and beta one-body density matrix that will be used for the 1h1p approach diff --git a/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f b/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f index 7c30e175..83170b4b 100644 --- a/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f +++ b/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f @@ -1,7 +1,7 @@ subroutine diag_inactive_virt_and_update_mos implicit none integer :: i,j,i_inact,j_inact,i_virt,j_virt - double precision :: tmp(mo_tot_num_align,mo_tot_num) + double precision :: tmp(mo_tot_num,mo_tot_num) character*(64) :: label print*,'Diagonalizing the occ and virt Fock operator' tmp = 0.d0 @@ -38,7 +38,7 @@ end subroutine diag_inactive_virt_new_and_update_mos implicit none integer :: i,j,i_inact,j_inact,i_virt,j_virt,k,k_act - double precision :: tmp(mo_tot_num_align,mo_tot_num),accu,get_mo_bielec_integral + double precision :: tmp(mo_tot_num,mo_tot_num),accu,get_mo_bielec_integral character*(64) :: label tmp = 0.d0 do i = 1, mo_tot_num diff --git a/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f b/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f index f42a0b0f..5426badb 100644 --- a/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f +++ b/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num_align,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num,ao_num) ] implicit none BEGIN_DOC ! Dressing of the core hamiltonian in the orthogonal AO basis set @@ -25,7 +25,7 @@ BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral, (ao_num_align, ao_num) ] +BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral, (ao_num, ao_num) ] implicit none BEGIN_DOC ! h core in orthogonal AO basis @@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, ao_mono_elec_integral_dressing, (ao_num,ao_nu ao_mono_elec_integral_dressing,size(ao_mono_elec_integral_dressing,1)) END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_dressing, (mo_tot_num_align,mo_tot_num) ] +BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_dressing, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! Dressing of the core hamiltonian in the MO basis set @@ -73,14 +73,14 @@ BEGIN_PROVIDER [ integer, idx_dressing ] END_PROVIDER -BEGIN_PROVIDER [ double precision, cusp_corrected_mos, (ao_num_align,mo_tot_num) ] +BEGIN_PROVIDER [ double precision, cusp_corrected_mos, (ao_num,mo_tot_num) ] implicit none BEGIN_DOC ! Dressing core hamiltonian in the AO basis set END_DOC integer :: i,j double precision, allocatable :: F(:,:), M(:,:) - allocate(F(mo_tot_num_align,mo_tot_num),M(ao_num,mo_tot_num)) + allocate(F(mo_tot_num,mo_tot_num),M(ao_num,mo_tot_num)) logical :: oneshot diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 436b89a4..5014fcef 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -85,7 +85,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag) - integer :: sze_8 integer :: iter integer :: i,j,k,l,m logical :: converged @@ -138,13 +137,10 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s enddo write(iunit,'(A)') trim(write_buffer) - integer, external :: align_double - sze_8 = align_double(sze) - allocate( & - W(sze_8,N_st_diag,davidson_sze_max), & - U(sze_8,N_st_diag,davidson_sze_max), & - R(sze_8,N_st_diag), & + W(sze,N_st_diag,davidson_sze_max), & + U(sze,N_st_diag,davidson_sze_max), & + R(sze,N_st_diag), & h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & residual_norm(N_st_diag), & @@ -199,7 +195,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s ! Compute |W_k> = \sum_i |i> ! ----------------------------------------- - call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze_8) + call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze) ! Compute h_kl = = @@ -320,7 +316,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s end -subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8) +subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze) use bitmasks implicit none BEGIN_DOC @@ -329,16 +325,16 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8) ! n : number of determinants ! END_DOC - integer, intent(in) :: n,Nint,N_st,sze_8 + integer, intent(in) :: n,Nint,N_st,sze double precision, intent(out) :: e_0(N_st) - double precision, intent(in) :: u_0(sze_8,N_st) + double precision, intent(in) :: u_0(sze,N_st) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer,intent(in) :: istate double precision, allocatable :: v_0(:,:), H_jj(:) double precision :: u_dot_u,u_dot_v,diag_H_mat_elem integer :: i,j - allocate(H_jj(n), v_0(sze_8,N_st)) + allocate(H_jj(n), v_0(sze,N_st)) do i = 1, n H_jj(i) = diag_H_mat_elem(keys_tmp(1,1,i),Nint) enddo @@ -347,7 +343,7 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8) H_jj(idx_ref(i)) += delta_ii(istate,i) enddo - call H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate,N_st,sze_8) + call H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate,N_st,sze) do i=1,N_st e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n) enddo @@ -355,7 +351,7 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8) end -subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) +subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze) use bitmasks implicit none BEGIN_DOC @@ -365,9 +361,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) ! ! H_jj : array of END_DOC - integer, intent(in) :: n,Nint,istate_in,N_st,sze_8 - double precision, intent(out) :: v_0(sze_8,N_st) - double precision, intent(in) :: u_0(sze_8,N_st) + integer, intent(in) :: n,Nint,istate_in,N_st,sze + double precision, intent(out) :: v_0(sze,N_st) + double precision, intent(in) :: u_0(sze,N_st) double precision, intent(in) :: H_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij @@ -396,9 +392,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze_8,& + !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze,& !$OMP istate_in,delta_ij,N_det_ref,N_det_non_ref,idx_ref,idx_non_ref) - allocate(vt(sze_8,N_st)) + allocate(vt(sze,N_st)) Vt = 0.d0 !$OMP DO SCHEDULE(static,1) @@ -590,7 +586,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag) - integer :: sze_8 + integer :: sze integer :: iter integer :: i,j,k,l,m logical :: converged @@ -649,14 +645,11 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz enddo write(iunit,'(A)') trim(write_buffer) - integer, external :: align_double - sze_8 = align_double(sze) - itermax = min(davidson_sze_max, sze/N_st_diag) allocate( & - W(sze_8,N_st_diag*itermax), & - U(sze_8,N_st_diag*itermax), & - S(sze_8,N_st_diag*itermax), & + W(sze,N_st_diag*itermax), & + U(sze,N_st_diag*itermax), & + S(sze,N_st_diag*itermax), & h(N_st_diag*itermax,N_st_diag*itermax), & y(N_st_diag*itermax,N_st_diag*itermax), & s_(N_st_diag*itermax,N_st_diag*itermax), & @@ -722,7 +715,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz ! ----------------------------------------- call H_S2_u_0_mrcc_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,& - istate,N_st_diag,sze_8) + istate,N_st_diag,sze) ! Compute h_kl = = @@ -960,7 +953,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz end -subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) +subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze) use bitmasks implicit none BEGIN_DOC @@ -972,9 +965,9 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i ! ! S2_jj : array of END_DOC - integer, intent(in) :: N_st,n,Nint, sze_8, istate_in - double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) - double precision, intent(in) :: u_0(sze_8,N_st) + integer, intent(in) :: N_st,n,Nint, sze, istate_in + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + double precision, intent(in) :: u_0(sze,N_st) double precision, intent(in) :: H_jj(n), S2_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 @@ -987,20 +980,16 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i integer(bit_kind) :: sorted_i(Nint) integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate - integer :: N_st_8 - integer, external :: align_double !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut - N_st_8 = align_double(N_st) - ASSERT (Nint > 0) ASSERT (Nint == N_int) ASSERT (n>0) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate(ut(N_st_8,n)) + allocate(ut(N_st,n)) v_0 = 0.d0 s_0 = 0.d0 @@ -1017,9 +1006,9 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i PROVIDE delta_ij_s2 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, & + !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st, & !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in) - allocate(vt(N_st_8,n),st(N_st_8,n)) + allocate(vt(N_st,n),st(N_st,n)) Vt = 0.d0 St = 0.d0 diff --git a/plugins/Perturbation/pt2_equations.irp.f b/plugins/Perturbation/pt2_equations.irp.f index 1cd0d440..57b3a814 100644 --- a/plugins/Perturbation/pt2_equations.irp.f +++ b/plugins/Perturbation/pt2_equations.irp.f @@ -314,7 +314,7 @@ subroutine pt2_epstein_nesbet_SC2_projected ($arguments) degree = popcnt(xor( ref_bitmask(1,1), det_pert(1,1))) + & popcnt(xor( ref_bitmask(1,2), det_pert(1,2))) - !DEC$ NOUNROLL + !DIR$ NOUNROLL do l=2,Nint degree = degree+ popcnt(xor( ref_bitmask(l,1), det_pert(l,1))) + & popcnt(xor( ref_bitmask(l,2), det_pert(l,2))) diff --git a/plugins/Properties/average.irp.f b/plugins/Properties/average.irp.f index b57c8ef2..5672277d 100644 --- a/plugins/Properties/average.irp.f +++ b/plugins/Properties/average.irp.f @@ -1,7 +1,7 @@ subroutine get_average(array,density,average) implicit none - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) - double precision, intent(in) :: density(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) + double precision, intent(in) :: density(mo_tot_num,mo_tot_num) double precision, intent(out):: average integer :: i,j BEGIN_DOC diff --git a/plugins/Properties/delta_rho.irp.f b/plugins/Properties/delta_rho.irp.f index 11375b87..e701377b 100644 --- a/plugins/Properties/delta_rho.irp.f +++ b/plugins/Properties/delta_rho.irp.f @@ -73,7 +73,7 @@ END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_all_points, (ao_num_align, ao_num, N_z_pts)] +BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_all_points, (ao_num, ao_num, N_z_pts)] BEGIN_DOC ! array of the overlap in x,y between the AO function and integrated between [z,z+dz] in the z axis ! for all the z points that are given (N_z_pts) @@ -148,7 +148,7 @@ BEGIN_PROVIDER [integer, i_unit_integrated_delta_rho] END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num_align, ao_num )] +BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num, ao_num )] BEGIN_DOC ! array of the overlap in x,y between the AO function and integrated between [z,z+dz] in the z axis ! for one specific z point @@ -209,7 +209,7 @@ BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num_al !$OMP END PARALLEL DO END_PROVIDER -BEGIN_PROVIDER [double precision, mo_integrated_delta_rho_one_point, (mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [double precision, mo_integrated_delta_rho_one_point, (mo_tot_num,mo_tot_num)] BEGIN_DOC ! ! array of the integrals needed of integrated_rho(alpha,z) - integrated_rho(beta,z) for z = z_one_point diff --git a/plugins/Properties/mulliken.irp.f b/plugins/Properties/mulliken.irp.f index 68b620c5..e27084c3 100644 --- a/plugins/Properties/mulliken.irp.f +++ b/plugins/Properties/mulliken.irp.f @@ -1,5 +1,5 @@ -BEGIN_PROVIDER [double precision, spin_population, (ao_num_align,ao_num)] +BEGIN_PROVIDER [double precision, spin_population, (ao_num,ao_num)] implicit none integer :: i,j BEGIN_DOC @@ -57,8 +57,8 @@ BEGIN_PROVIDER [double precision, mulliken_spin_densities, (nucl_num)] END_PROVIDER - BEGIN_PROVIDER [double precision, electronic_population_alpha, (ao_num_align,ao_num)] -&BEGIN_PROVIDER [double precision, electronic_population_beta, (ao_num_align,ao_num)] + BEGIN_PROVIDER [double precision, electronic_population_alpha, (ao_num,ao_num)] +&BEGIN_PROVIDER [double precision, electronic_population_beta, (ao_num,ao_num)] implicit none integer :: i,j BEGIN_DOC diff --git a/plugins/Properties/routines_test.irp.f b/plugins/Properties/routines_test.irp.f index 231c6f2d..8e06d29d 100644 --- a/plugins/Properties/routines_test.irp.f +++ b/plugins/Properties/routines_test.irp.f @@ -2,7 +2,7 @@ subroutine test_average_value(array,value) implicit none - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) double precision, intent(in) :: value double precision :: tmp,hij integer :: i,j @@ -24,7 +24,7 @@ end subroutine test_average_value_alpha_beta(array,value) implicit none - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) double precision, intent(in) :: value double precision :: tmp,hij integer :: i,j diff --git a/plugins/Properties/slater_rules_mono_electronic.irp.f b/plugins/Properties/slater_rules_mono_electronic.irp.f index 3e4d94bc..1d981a15 100644 --- a/plugins/Properties/slater_rules_mono_electronic.irp.f +++ b/plugins/Properties/slater_rules_mono_electronic.irp.f @@ -10,7 +10,7 @@ subroutine i_O1_j(array,key_i,key_j,Nint,hij) integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) double precision, intent(out) :: hij - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) integer :: exc(0:2,2,2) integer :: degree @@ -25,7 +25,7 @@ subroutine i_O1_j(array,key_i,key_j,Nint,hij) ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num) hij = 0.d0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call get_excitation_degree(key_i,key_j,degree,Nint) select case (degree) case (2) @@ -53,7 +53,7 @@ subroutine i_O1_psi(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array) use bitmasks implicit none integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) integer(bit_kind), intent(in) :: key(Nint,2) double precision, intent(in) :: coef(Ndet_max,Nstate) @@ -80,7 +80,7 @@ subroutine i_O1_psi(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array) call filter_connected_mono(keys,key,Nint,Ndet,idx) do ii=1,idx(0) i = idx(ii) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call i_O1_j(array,keys(1,1,i),key,Nint,hij) do j = 1, Nstate i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij @@ -96,7 +96,7 @@ double precision function diag_O1_mat_elem(array,det_in,Nint) END_DOC integer,intent(in) :: Nint integer(bit_kind),intent(in) :: det_in(Nint,2) - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) integer :: i, ispin,tmp integer :: occ_det(Nint*bit_kind_size,2) @@ -120,7 +120,7 @@ subroutine i_O1_psi_alpha_beta(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H use bitmasks implicit none integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) integer(bit_kind), intent(in) :: key(Nint,2) double precision, intent(in) :: coef(Ndet_max,Nstate) @@ -147,7 +147,7 @@ subroutine i_O1_psi_alpha_beta(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H call filter_connected_mono(keys,key,Nint,Ndet,idx) do ii=1,idx(0) i = idx(ii) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call i_O1_j_alpha_beta(array,keys(1,1,i),key,Nint,hij) do j = 1, Nstate i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij @@ -167,7 +167,7 @@ subroutine i_O1_j_alpha_beta(array,key_i,key_j,Nint,hij) integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) double precision, intent(out) :: hij - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) integer :: exc(0:2,2,2) integer :: degree @@ -182,7 +182,7 @@ subroutine i_O1_j_alpha_beta(array,key_i,key_j,Nint,hij) ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num) hij = 0.d0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call get_excitation_degree(key_i,key_j,degree,Nint) select case (degree) case (2) @@ -215,7 +215,7 @@ double precision function diag_O1_mat_elem_alpha_beta(array,det_in,Nint) END_DOC integer,intent(in) :: Nint integer(bit_kind),intent(in) :: det_in(Nint,2) - double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num) + double precision, intent(in) :: array(mo_tot_num,mo_tot_num) integer :: i, ispin,tmp integer :: occ_det(Nint*bit_kind_size,2) @@ -319,7 +319,7 @@ subroutine filter_connected_mono(key1,key2,Nint,sze,idx) !DIR$ LOOP COUNT (1000) do i=1,sze degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) + !DIR$ LOOP COUNT MIN(4) do j=1,Nint degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +& popcnt(xor( key1(j,2,i), key2(j,2))) diff --git a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES index 8b137891..1814a42f 100644 --- a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES +++ b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ - +Selectors_Utils diff --git a/plugins/Selectors_CASSD/selectors.irp.f b/plugins/Selectors_CASSD/selectors.irp.f index 31a8ab4f..167ec66e 100644 --- a/plugins/Selectors_CASSD/selectors.irp.f +++ b/plugins/Selectors_CASSD/selectors.irp.f @@ -1,10 +1,5 @@ use bitmasks -BEGIN_PROVIDER [ integer, psi_selectors_size ] - implicit none - psi_selectors_size = psi_det_size -END_PROVIDER - BEGIN_PROVIDER [ integer, N_det_selectors] implicit none BEGIN_DOC @@ -66,30 +61,4 @@ END_PROVIDER endif END_PROVIDER -BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ] - implicit none - BEGIN_DOC - ! Transposed psi_selectors - END_DOC - integer :: i,k - - do i=1,N_det_selectors - do k=1,N_states - psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] - implicit none - BEGIN_DOC - ! Diagonal elements of the H matrix for each selectors - END_DOC - integer :: i - double precision :: diag_H_mat_elem - do i = 1, N_det_selectors - psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int) - enddo -END_PROVIDER - diff --git a/plugins/Selectors_CASSD/zmq.irp.f b/plugins/Selectors_CASSD/zmq.irp.f deleted file mode 100644 index 2d4987d3..00000000 --- a/plugins/Selectors_CASSD/zmq.irp.f +++ /dev/null @@ -1,121 +0,0 @@ -subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id, energy, size_energy) - use f77_zmq - implicit none - BEGIN_DOC -! Put the wave function on the qp_run scheduler - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - integer, intent(in) :: worker_id - integer, intent(in) :: size_energy - double precision, intent(out) :: energy(size_energy) - integer :: rc - integer*8 :: rc8 - character*(256) :: msg - - write(msg,*) 'put_psi ', worker_id, N_states, N_det, psi_det_size, n_det_generators, n_det_selectors - - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) - if (rc /= len(trim(msg))) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)' - stop 'error' - endif - - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,ZMQ_SNDMORE) - if (rc8 /= N_int*2_8*N_det*bit_kind) then - print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,ZMQ_SNDMORE)' - stop 'error' - endif - - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE) - if (rc8 /= psi_det_size*N_states*8_8) then - print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)' - stop 'error' - endif - - rc = f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0) - if (rc /= size_energy*8) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)' - stop 'error' - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:rc) /= 'put_psi_reply 1') then - print *, rc, trim(msg) - print *, 'Error in put_psi_reply' - stop 'error' - endif - -end - - - -subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy) - use f77_zmq - implicit none - BEGIN_DOC -! Get the wave function from the qp_run scheduler - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - integer, intent(in) :: worker_id - integer, intent(in) :: size_energy - double precision, intent(out) :: energy(size_energy) - integer :: rc - integer*8 :: rc8 - character*(64) :: msg - - write(msg,*) 'get_psi ', worker_id - - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) - if (rc /= len(trim(msg))) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)' - stop 'error' - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:13) /= 'get_psi_reply') then - print *, rc, trim(msg) - print *, 'Error in get_psi_reply' - stop 'error' - endif - - integer :: N_states_read, N_det_read, psi_det_size_read - integer :: N_det_selectors_read, N_det_generators_read - read(msg(14:rc),*) N_states_read, N_det_read, psi_det_size_read, & - N_det_generators_read, N_det_selectors_read - - N_states = N_states_read - N_det = N_det_read - psi_det_size = psi_det_size_read - TOUCH psi_det_size N_det N_states - - rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,0) - if (rc8 /= N_int*2_8*N_det*bit_kind) then - print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)' - stop 'error' - endif - - rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,0) - if (rc8 /= psi_det_size*N_states*8_8) then - print *, '77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)' - stop 'error' - endif - TOUCH psi_det psi_coef - - rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0) - if (rc /= size_energy*8) then - print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)' - stop 'error' - endif - - if (N_det_generators_read > 0) then - N_det_generators = N_det_generators_read - TOUCH N_det_generators - endif - if (N_det_selectors_read > 0) then - N_det_selectors = N_det_selectors_read - TOUCH N_det_selectors - endif - -end - - diff --git a/plugins/Selectors_full/NEEDED_CHILDREN_MODULES b/plugins/Selectors_full/NEEDED_CHILDREN_MODULES index 54f54203..d2716281 100644 --- a/plugins/Selectors_full/NEEDED_CHILDREN_MODULES +++ b/plugins/Selectors_full/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Determinants Hartree_Fock +Determinants Hartree_Fock Selectors_Utils diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f index e8e746c8..42e3c87b 100644 --- a/plugins/Selectors_full/selectors.irp.f +++ b/plugins/Selectors_full/selectors.irp.f @@ -1,10 +1,5 @@ use bitmasks -BEGIN_PROVIDER [ integer, psi_selectors_size ] - implicit none - psi_selectors_size = psi_det_size -END_PROVIDER - BEGIN_PROVIDER [ integer, N_det_selectors] implicit none BEGIN_DOC @@ -50,30 +45,4 @@ END_PROVIDER enddo END_PROVIDER -BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ] - implicit none - BEGIN_DOC - ! Transposed psi_selectors - END_DOC - integer :: i,k - - do i=1,N_det_selectors - do k=1,N_states - psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] - implicit none - BEGIN_DOC - ! Diagonal elements of the H matrix for each selectors - END_DOC - integer :: i - double precision :: diag_H_mat_elem - do i = 1, N_det_selectors - psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int) - enddo -END_PROVIDER - diff --git a/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES b/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES index aae89501..27283c7d 100644 --- a/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES +++ b/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Determinants +Determinants Selectors_Utils diff --git a/plugins/Selectors_no_sorted/e_corr_selectors.irp.f b/plugins/Selectors_no_sorted/e_corr_selectors.irp.f deleted file mode 100644 index 952e1c23..00000000 --- a/plugins/Selectors_no_sorted/e_corr_selectors.irp.f +++ /dev/null @@ -1,79 +0,0 @@ - -use bitmasks - BEGIN_PROVIDER [integer, exc_degree_per_selectors, (N_det_selectors)] -&BEGIN_PROVIDER [integer, double_index_selectors, (N_det_selectors)] -&BEGIN_PROVIDER [integer, n_double_selectors] - implicit none - BEGIN_DOC - ! degree of excitation respect to Hartree Fock for the wave function - ! - ! for the all the selectors determinants - ! - ! double_index_selectors = list of the index of the double excitations - ! - ! n_double_selectors = number of double excitations in the selectors determinants - END_DOC - integer :: i,degree - n_double_selectors = 0 - do i = 1, N_det_selectors - call get_excitation_degree(psi_selectors(1,1,i),ref_bitmask,degree,N_int) - exc_degree_per_selectors(i) = degree - if(degree==2)then - n_double_selectors += 1 - double_index_selectors(n_double_selectors) =i - endif - enddo -END_PROVIDER - - BEGIN_PROVIDER[double precision, coef_hf_selector] - &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf] - &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf_squared] - &BEGIN_PROVIDER[double precision, E_corr_per_selectors, (N_det_selectors)] - &BEGIN_PROVIDER[double precision, i_H_HF_per_selectors, (N_det_selectors)] - &BEGIN_PROVIDER[double precision, Delta_E_per_selector, (N_det_selectors)] - &BEGIN_PROVIDER[double precision, E_corr_double_only ] - &BEGIN_PROVIDER[double precision, E_corr_second_order ] - implicit none - BEGIN_DOC - ! energy of correlation per determinant respect to the Hartree Fock determinant - ! - ! for the all the double excitations in the selectors determinants - ! - ! E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation - ! - ! E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation - ! - ! coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants - END_DOC - PROVIDE ref_bitmask_energy psi_selectors ref_bitmask N_int psi_selectors - integer :: i,degree - double precision :: hij,diag_H_mat_elem - E_corr_double_only = 0.d0 - E_corr_second_order = 0.d0 - do i = 1, N_det_selectors - if(exc_degree_per_selectors(i)==2)then - call i_H_j(ref_bitmask,psi_selectors(1,1,i),N_int,hij) - i_H_HF_per_selectors(i) = hij - E_corr_per_selectors(i) = psi_selectors_coef(i,1) * hij - E_corr_double_only += E_corr_per_selectors(i) - E_corr_second_order += hij * hij /(ref_bitmask_energy - diag_H_mat_elem(psi_selectors(1,1,i),N_int)) - elseif(exc_degree_per_selectors(i) == 0)then - coef_hf_selector = psi_selectors_coef(i,1) - E_corr_per_selectors(i) = -1000.d0 - Delta_E_per_selector(i) = 0.d0 - else - E_corr_per_selectors(i) = -1000.d0 - endif - enddo - if (dabs(coef_hf_selector) > 1.d-8) then - inv_selectors_coef_hf = 1.d0/coef_hf_selector - inv_selectors_coef_hf_squared = inv_selectors_coef_hf * inv_selectors_coef_hf - else - inv_selectors_coef_hf = 0.d0 - inv_selectors_coef_hf_squared = 0.d0 - endif - do i = 1,n_double_selectors - E_corr_per_selectors(double_index_selectors(i)) *=inv_selectors_coef_hf - enddo - E_corr_double_only = E_corr_double_only * inv_selectors_coef_hf - END_PROVIDER diff --git a/plugins/Selectors_no_sorted/selectors.irp.f b/plugins/Selectors_no_sorted/selectors.irp.f index 83a8d472..3ac8218d 100644 --- a/plugins/Selectors_no_sorted/selectors.irp.f +++ b/plugins/Selectors_no_sorted/selectors.irp.f @@ -1,12 +1,5 @@ - use bitmasks - -BEGIN_PROVIDER [ integer, psi_selectors_size ] - implicit none - psi_selectors_size = psi_det_size -END_PROVIDER - BEGIN_PROVIDER [ integer, N_det_selectors] implicit none BEGIN_DOC @@ -45,16 +38,3 @@ END_PROVIDER enddo END_PROVIDER - BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] - implicit none - BEGIN_DOC - ! Diagonal elements of the H matrix for each selectors - END_DOC - integer :: i - double precision :: diag_H_mat_elem - do i = 1, N_det_selectors - psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int) - enddo - END_PROVIDER - - diff --git a/plugins/read_integral/read_integrals_mo.irp.f b/plugins/read_integral/read_integrals_mo.irp.f index 5376b2a2..06db0ddf 100644 --- a/plugins/read_integral/read_integrals_mo.irp.f +++ b/plugins/read_integral/read_integrals_mo.irp.f @@ -28,7 +28,7 @@ subroutine run call ezfio_get_mo_basis_mo_tot_num(mo_tot_num) - allocate (A(mo_tot_num_align,mo_tot_num)) + allocate (A(mo_tot_num,mo_tot_num)) A = 0.d0 iunit = getunitandopen('kinetic_mo','r') diff --git a/src/AO_Basis/ao_overlap.irp.f b/src/AO_Basis/ao_overlap.irp.f index 83110293..72aaaca0 100644 --- a/src/AO_Basis/ao_overlap.irp.f +++ b/src/AO_Basis/ao_overlap.irp.f @@ -1,7 +1,7 @@ - BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num_align,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num_align,ao_num) ] + BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num,ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num,ao_num) ] implicit none BEGIN_DOC ! Overlap between atomic basis functions: @@ -34,8 +34,6 @@ power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num ao_overlap(i,j)= 0.d0 ao_overlap_x(i,j)= 0.d0 @@ -49,7 +47,6 @@ power_B(3) = ao_power( i, 3 ) do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(l,i) call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) @@ -72,7 +69,7 @@ END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ] implicit none BEGIN_DOC ! Overlap between absolute value of atomic basis functions: @@ -103,8 +100,6 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ] power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num ao_overlap_abs(i,j)= 0.d0 B_center(1) = nucl_coord( ao_nucl(i), 1 ) @@ -115,7 +110,6 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ] power_B(3) = ao_power( i, 3 ) do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(l,i) call overlap_x_abs(A_center(1),B_center(1),alpha,beta,power_A(1),power_B(1),overlap_x,lower_exp_val,dx,dim1) diff --git a/src/AO_Basis/aos.irp.f b/src/AO_Basis/aos.irp.f index 871c0ee6..5d255a00 100644 --- a/src/AO_Basis/aos.irp.f +++ b/src/AO_Basis/aos.irp.f @@ -1,14 +1,3 @@ -BEGIN_PROVIDER [ integer, ao_num_align ] - implicit none - - BEGIN_DOC - ! Number of atomic orbitals align - END_DOC - - integer :: align_double - ao_num_align = align_double(ao_num) -END_PROVIDER - BEGIN_PROVIDER [ integer, ao_prim_num_max ] implicit none ao_prim_num_max = 0 @@ -16,7 +5,7 @@ BEGIN_PROVIDER [ integer, ao_prim_num_max ] call ezfio_get_ao_basis_ao_prim_num_max(ao_prim_num_max) END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num_align,ao_prim_num_max) ] + BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num,ao_prim_num_max) ] &BEGIN_PROVIDER [ double precision, ao_coef_normalization_factor, (ao_num) ] implicit none BEGIN_DOC @@ -86,8 +75,8 @@ BEGIN_PROVIDER [ double precision, ao_coef_normalization_libint_factor, (ao_num) END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num_align,ao_prim_num_max) ] -&BEGIN_PROVIDER [ double precision, ao_expo_ordered, (ao_num_align,ao_prim_num_max) ] + BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num,ao_prim_num_max) ] +&BEGIN_PROVIDER [ double precision, ao_expo_ordered, (ao_num,ao_prim_num_max) ] implicit none BEGIN_DOC ! Sorted primitives to accelerate 4 index MO transformation @@ -112,7 +101,7 @@ END_PROVIDER END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_num_max_align,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_num_max,ao_num) ] implicit none BEGIN_DOC ! Transposed ao_coef_normalized_ordered @@ -126,7 +115,7 @@ BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_n END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_expo_ordered_transp, (ao_prim_num_max_align,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_expo_ordered_transp, (ao_prim_num_max,ao_num) ] implicit none BEGIN_DOC ! Transposed ao_expo_ordered @@ -155,16 +144,6 @@ END_PROVIDER ao_l_max = maxval(ao_l) END_PROVIDER -BEGIN_PROVIDER [ integer, ao_prim_num_max_align ] - implicit none - BEGIN_DOC -! Number of primitives per atomic orbital aligned - END_DOC - - integer :: align_double - ao_prim_num_max_align = align_double(ao_prim_num_max) -END_PROVIDER - integer function ao_power_index(nx,ny,nz) implicit none integer, intent(in) :: nx, ny, nz diff --git a/src/Bitmask/mpi.irp.f b/src/Bitmask/mpi.irp.f new file mode 100644 index 00000000..18af1ca3 --- /dev/null +++ b/src/Bitmask/mpi.irp.f @@ -0,0 +1,44 @@ +BEGIN_PROVIDER [ integer, mpi_bit_kind ] + use bitmasks + implicit none + BEGIN_DOC + ! MPI bit kind type + END_DOC + IRP_IF MPI + include 'mpif.h' + if (bit_kind == 4) then + mpi_bit_kind = MPI_INTEGER4 + else if (bit_kind == 8) then + mpi_bit_kind = MPI_INTEGER8 + else + stop 'Wrong bit kind in mpi_bit_kind' + endif + IRP_ELSE + mpi_bit_kind = -1 + IRP_ENDIF +END_PROVIDER + +subroutine broadcast_chunks_bit_kind(A, LDA) + use bitmasks + implicit none + integer, intent(in) :: LDA + integer(bit_kind), intent(inout) :: A(LDA) + BEGIN_DOC +! Broadcast with chunks of ~2GB + END_DOC + IRP_IF MPI + include 'mpif.h' + integer :: i, sze, ierr + do i=1,LDA,200000000/bit_kind_size + sze = min(LDA-i+1, 200000000/bit_kind_size) + call MPI_BCAST (A(i), sze, MPI_BIT_KIND, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here//': Unable to broadcast chunks bit_kind', i + stop -1 + endif + enddo + IRP_ENDIF +end + + + diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 29a04596..ec2440d5 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -371,3 +371,69 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] call write_int(6,nthreads_davidson,'Number of threads for Diagonalization') END_PROVIDER + +subroutine zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id) + use f77_zmq + implicit none + BEGIN_DOC +! Put N_states_diag on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer :: rc + character*(256) :: msg + + write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'N_states_diag' + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + print *, irp_here, ': Error sending N_states_diag' + stop 'error' + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,N_states_diag,4,0) + if (rc /= 4) then + print *, irp_here, ': Error sending N_states_diag' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + print *, rc, trim(msg) + print *, irp_here, ': Error in put_data_reply' + stop 'error' + endif + +end + +subroutine zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id) + use f77_zmq + implicit none + BEGIN_DOC +! Get N_states_diag from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer :: rc + character*(64) :: msg + + write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'N_states_diag' + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + print *, irp_here, ': Error getting N_states_diag' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + print *, rc, trim(msg) + print *, irp_here, ': Error in get_data_reply' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,N_states_diag,4,0) + if (rc /= 4) then + print *, irp_here, ': Error getting N_states_diag' + stop 'error' + endif + +end diff --git a/src/Davidson/diagonalization.irp.f b/src/Davidson/diagonalization.irp.f index e4d51198..b95cb946 100644 --- a/src/Davidson/diagonalization.irp.f +++ b/src/Davidson/diagonalization.irp.f @@ -363,8 +363,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia enddo write(iunit,'(A)') trim(write_buffer) - integer, external :: align_double - allocate( & kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & W(sze,N_st_diag,davidson_sze_max), & diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 7f9cb889..2dfe468e 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -113,7 +113,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ stop -1 endif - integer, external :: align_double itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse diff --git a/src/Davidson/u0Hu0_old.irp.f b/src/Davidson/u0Hu0_old.irp.f deleted file mode 100644 index 5fc68f04..00000000 --- a/src/Davidson/u0Hu0_old.irp.f +++ /dev/null @@ -1,518 +0,0 @@ - -subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - ! - END_DOC - integer, intent(in) :: N_st,n,Nint, sze - double precision, intent(out) :: v_0(sze,N_st) - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - double precision :: hij,s2 - double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) - integer :: i,j,k,l, jj,ii - integer :: i0, j0 - - integer, allocatable :: shortcut(:,:), sort_idx(:,:) - integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:) - integer(bit_kind) :: sorted_i(Nint) - - integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate - integer :: N_st_8 - - integer, external :: align_double - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st - - N_st_8 = align_double(N_st) - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy - - allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate( ut(N_st_8,n)) - - v_0 = 0.d0 - - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) - call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) - allocate(vt(N_st_8,n),st(N_st_8,n)) - Vt = 0.d0 - St = 0.d0 - - !$OMP DO - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(sort_idx(i,2),istate) - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(static,1) - do sh=1,shortcut(0,2) - do i=shortcut(sh,2),shortcut(sh+1,2)-1 - org_i = sort_idx(i,2) - do j=shortcut(sh,2),shortcut(sh+1,2)-1 - org_j = sort_idx(j,2) - ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) - if (ext > 4) exit - end do - if(ext == 4) then - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - end if - end do - end do - enddo - !$OMP END DO - - !$OMP DO - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(sort_idx(i,1),istate) - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(static,1) - do sh=1,shortcut(0,1) - do sh2=1,shortcut(0,1) - if (sh==sh2) cycle - - exa = 0 - do ni=1,Nint - exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) - end do - if(exa > 2) then - cycle - end if - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if (ext > 4) exit - end do - if(ext <= 4) then - org_j = sort_idx(j,1) - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - if (hij /= 0.d0) then - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - enddo - endif - if (ext /= 2) then - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - if (s2 /= 0.d0) then - do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - endif - endif - endif - enddo - - enddo - enddo - - exa = 0 - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - do j=shortcut(sh,1),i-1 - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if (ext > 4) exit - end do - if(ext <= 4) then - org_j = sort_idx(j,1) - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - if (hij /= 0.d0) then - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - enddo - endif - if (ext /= 2) then - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - if (s2 /= 0.d0) then - do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - endif - endif - endif - enddo - - do j=i+1,shortcut(sh+1,1)-1 - if (i==j) cycle - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if (ext > 4) exit - end do - if(ext <= 4) then - org_j = sort_idx(j,1) - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - if (hij /= 0.d0) then - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - enddo - endif - if (ext /= 2) then - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - if (s2 /= 0.d0) then - do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - endif - endif - endif - enddo - enddo - enddo - !$OMP END DO - - do istate=1,N_st - do i=1,n - !$OMP ATOMIC - v_0(i,istate) = v_0(i,istate) + vt(istate,i) - enddo - enddo - - deallocate(vt,st) - !$OMP END PARALLEL - - do istate=1,N_st - do i=1,n - v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate) - enddo - enddo - deallocate (shortcut, sort_idx, sorted, version, ut) -end - - - - - -subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - ! - ! S2_jj : array of - END_DOC - integer, intent(in) :: N_st,n,Nint, sze - double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(in) :: H_jj(n), S2_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - double precision :: hij,s2 - double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) - integer :: i,j,k,l, jj,ii - integer :: i0, j0 - - integer, allocatable :: shortcut(:,:), sort_idx(:,:) - integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:) - integer(bit_kind) :: sorted_i(Nint) - - integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate - integer :: N_st_8 - - integer, external :: align_double - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st - - N_st_8 = align_double(N_st) - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy - - allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate( ut(N_st_8,n)) - - v_0 = 0.d0 - s_0 = 0.d0 - - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) - call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) - allocate(vt(N_st_8,n),st(N_st_8,n)) - Vt = 0.d0 - St = 0.d0 - - !$OMP DO - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(sort_idx(i,2),istate) - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(static,4) - do sh=1,shortcut(0,2) - do i=shortcut(sh,2),shortcut(sh+1,2)-1 - org_i = sort_idx(i,2) - do j=shortcut(sh,2),shortcut(sh+1,2)-1 - org_j = sort_idx(j,2) - ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) - if (ext > 4) exit - end do - if(ext == 4) then - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - end if - end do - end do - enddo - !$OMP END DO - - !$OMP DO - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(sort_idx(i,1),istate) - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(static,4) - do sh=1,shortcut(0,1) - do sh2=1,shortcut(0,1) - if (sh==sh2) cycle - - exa = 0 - do ni=1,Nint - exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) - end do - if(exa > 2) then - cycle - end if - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if (ext > 4) exit - end do - if(ext <= 4) then - org_j = sort_idx(j,1) - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - if (hij /= 0.d0) then - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - enddo - endif - if (ext /= 2) then - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - if (s2 /= 0.d0) then - do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - endif - endif - endif - enddo - - enddo - enddo - - exa = 0 - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - do j=shortcut(sh,1),i-1 - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if (ext > 4) exit - end do - if(ext <= 4) then - org_j = sort_idx(j,1) - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - if (hij /= 0.d0) then - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - enddo - endif - if (ext /= 2) then - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - if (s2 /= 0.d0) then - do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - endif - endif - endif - enddo - - do j=i+1,shortcut(sh+1,1)-1 - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if (ext > 4) cycle - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if (ext > 4) exit - end do - if(ext <= 4) then - org_j = sort_idx(j,1) - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - if (hij /= 0.d0) then - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) - enddo - endif - if (ext /= 2) then - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - if (s2 /= 0.d0) then - do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) - enddo - endif - endif - endif - enddo - enddo - enddo - !$OMP END DO - - do istate=1,N_st - do i=1,n - !$OMP ATOMIC - v_0(i,istate) = v_0(i,istate) + vt(istate,i) - !$OMP ATOMIC - s_0(i,istate) = s_0(i,istate) + st(istate,i) - enddo - enddo - - deallocate(vt,st) - !$OMP END PARALLEL - - do istate=1,N_st - do i=1,n - v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate) - s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) - enddo - enddo - deallocate (shortcut, sort_idx, sorted, version, ut) -end - -subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze) - use bitmasks - implicit none - integer, intent(in) :: N_st,n,Nint, sze - integer(bit_kind), intent(in) :: keys_tmp(Nint,2,n) - double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st) - double precision, intent(in) :: u_0(sze,N_st) - double precision, intent(in) :: H_jj(n), S2_jj(n) - - PROVIDE ref_bitmask_energy - - double precision, allocatable :: vt(:,:) - integer, allocatable :: idx(:) - integer :: i,j, jj, l - double precision :: hij - - do i=1,n - v_0(i,:) = H_jj(i) * u_0(i,:) - enddo - - allocate(idx(0:n), vt(N_st,n)) - Vt = 0.d0 - !$OMP PARALLEL DO DEFAULT(shared) PRIVATE(i,idx,jj,j,degree,exc,phase,hij,l) SCHEDULE(static,1) - do i=2,n - idx(0) = i - call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) - do jj=1,idx(0) - j = idx(jj) - double precision :: phase - integer :: degree - integer :: exc(0:2,2,2) - call get_excitation(keys_tmp(1,1,j),keys_tmp(1,1,i),exc,degree,phase,Nint) -! if ((degree == 2).and.(exc(0,1,1)==1)) then -! continue -! else -! cycle -! endif -! if ((degree == 2).and.(exc(0,1,1)==1)) cycle -! if ((degree > 1)) cycle -! if ((degree == 1)) cycle -! if (exc(0,1,2) /= 0) cycle -! if (exc(0,1,1) == 2) cycle -! if (exc(0,1,2) == 2) cycle -! if ((degree==1).and.(exc(0,1,1) == 1)) cycle - call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) - do l=1,N_st - !$OMP ATOMIC - vt (l,i) = vt (l,i) + hij*u_0(j,l) - !$OMP ATOMIC - vt (l,j) = vt (l,j) + hij*u_0(i,l) - enddo - enddo - enddo - !$OMP END PARALLEL DO - do i=1,n - v_0(i,:) = v_0(i,:) + vt(:,i) - enddo -end - diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index 84775770..7ac7b9ce 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -78,7 +78,7 @@ subroutine filter_not_connected(key1,key2,Nint,sze,idx) !DIR$ LOOP COUNT (1000) do i=1,sze degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) + !DIR$ LOOP COUNT MIN(4) do j=1,Nint degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +& popcnt(xor( key1(j,2,i), key2(j,2))) @@ -177,7 +177,7 @@ subroutine filter_connected(key1,key2,Nint,sze,idx) !DIR$ LOOP COUNT (1000) do i=1,sze degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) + !DIR$ LOOP COUNT MIN(4) do j=1,Nint degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +& popcnt(xor( key1(j,2,i), key2(j,2))) @@ -404,7 +404,7 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) !DIR$ LOOP COUNT (1000) outer: do i=1,sze degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) + !DIR$ LOOP COUNT MIN(4) do m=1,Nint if ( key1(m,1,i) /= key2(m,1)) then degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) @@ -454,7 +454,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat) integer :: degree degree = popcnt(xor( ref_bitmask(1,1), key2(1,1))) + & popcnt(xor( ref_bitmask(1,2), key2(1,2))) - !DEC$ NOUNROLL + !DIR$ NOUNROLL do m=2,Nint degree = degree+ popcnt(xor( ref_bitmask(m,1), key2(m,1))) + & popcnt(xor( ref_bitmask(m,2), key2(m,2))) @@ -526,7 +526,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat) !DIR$ LOOP COUNT (1000) do i=1,sze degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) + !DIR$ LOOP COUNT MIN(4) do m=1,Nint degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) +& popcnt(xor( key1(m,2,i), key2(m,2))) @@ -610,7 +610,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat) !DIR$ LOOP COUNT (1000) do i=1,sze degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) + !DIR$ LOOP COUNT MIN(4) do m=1,Nint degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) +& popcnt(xor( key1(m,2,i), key2(m,2))) diff --git a/src/Determinants/two_body_dm_map.irp.f b/src/Determinants/two_body_dm_map.irp.f index 2228b1b5..f2ad69ac 100644 --- a/src/Determinants/two_body_dm_map.irp.f +++ b/src/Determinants/two_body_dm_map.irp.f @@ -141,8 +141,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl) n_elements += 1 contrib = psi_coef(i,1) * psi_coef(j,1) * phase buffer_value(n_elements) = contrib - !DEC$ FORCEINLINE -! call mo_bielec_integrals_index(h1,p1,h2,p2,buffer_i(n_elements)) + !DIR$ FORCEINLINE call mo_bielec_integrals_index(h1,h2,p1,p2,buffer_i(n_elements)) ! if (n_elements == size_buffer) then ! call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,& diff --git a/src/Integrals_Bielec/ao_bi_integrals.irp.f b/src/Integrals_Bielec/ao_bi_integrals.irp.f index 4750d5a0..2ee14962 100644 --- a/src/Integrals_Bielec/ao_bi_integrals.irp.f +++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f @@ -483,7 +483,6 @@ double precision function general_primitive_integral(dim, & accu = 0.d0 iorder = iorder_p(1)+iorder_q(1)+iorder_p(1)+iorder_q(1) - !DIR$ VECTOR ALIGNED do ix=0,iorder Ix_pol(ix) = 0.d0 enddo @@ -494,9 +493,9 @@ double precision function general_primitive_integral(dim, & do jx = 0, iorder_q(1) d = a*Q_new(jx,1) if (abs(d) < thresh) cycle - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call give_polynom_mult_center_x(P_center(1),Q_center(1),ix,jx,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dx,nx) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call add_poly_multiply(dx,nx,d,Ix_pol,n_Ix) enddo enddo @@ -504,7 +503,6 @@ double precision function general_primitive_integral(dim, & return endif iorder = iorder_p(2)+iorder_q(2)+iorder_p(2)+iorder_q(2) - !DIR$ VECTOR ALIGNED do ix=0, iorder Iy_pol(ix) = 0.d0 enddo @@ -515,9 +513,9 @@ double precision function general_primitive_integral(dim, & do jy = 0, iorder_q(2) e = b*Q_new(jy,2) if (abs(e) < thresh) cycle - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call give_polynom_mult_center_x(P_center(2),Q_center(2),iy,jy,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dy,ny) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call add_poly_multiply(dy,ny,e,Iy_pol,n_Iy) enddo endif @@ -537,9 +535,9 @@ double precision function general_primitive_integral(dim, & do jz = 0, iorder_q(3) f = c*Q_new(jz,3) if (abs(f) < thresh) cycle - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call give_polynom_mult_center_x(P_center(3),Q_center(3),iz,jz,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dz,nz) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call add_poly_multiply(dz,nz,f,Iz_pol,n_Iz) enddo endif @@ -559,7 +557,7 @@ double precision function general_primitive_integral(dim, & d_poly(i)=0.d0 enddo - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) if (n_pt_tmp == -1) then return @@ -569,7 +567,7 @@ double precision function general_primitive_integral(dim, & d1(i)=0.d0 enddo - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) double precision :: rint_sum accu = accu + rint_sum(n_pt_out,const,d1) @@ -673,7 +671,6 @@ subroutine integrale_new(I_f,a_x,b_x,c_x,d_x,a_y,b_y,c_y,d_y,a_z,b_z,c_z,d_z,p,q sy = iy+jy sz = iz+jz - !DIR$ VECTOR ALIGNED do i = 1,n_pt B10(i) = p10_1 - gauleg_t2(i,j)* p10_2 B01(i) = p01_1 - gauleg_t2(i,j)* p01_2 @@ -682,27 +679,23 @@ subroutine integrale_new(I_f,a_x,b_x,c_x,d_x,a_y,b_y,c_y,d_y,a_z,b_z,c_z,d_z,p,q if (sx > 0) then call I_x1_new(ix,jx,B10,B01,B00,t1,n_pt) else - !DIR$ VECTOR ALIGNED do i = 1,n_pt t1(i) = 1.d0 enddo endif if (sy > 0) then call I_x1_new(iy,jy,B10,B01,B00,t2,n_pt) - !DIR$ VECTOR ALIGNED do i = 1,n_pt t1(i) = t1(i)*t2(i) enddo endif if (sz > 0) then call I_x1_new(iz,jz,B10,B01,B00,t2,n_pt) - !DIR$ VECTOR ALIGNED do i = 1,n_pt t1(i) = t1(i)*t2(i) enddo endif I_f= 0.d0 - !DIR$ VECTOR ALIGNED do i = 1,n_pt I_f += gauleg_w(i,j)*t1(i) enddo @@ -724,7 +717,6 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt) integer :: i if(c<0)then - !DIR$ VECTOR ALIGNED do i=1,n_pt res(i) = 0.d0 enddo @@ -732,14 +724,12 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt) call I_x2_new(c,B_10,B_01,B_00,res,n_pt) else if (a==1) then call I_x2_new(c-1,B_10,B_01,B_00,res,n_pt) - !DIR$ VECTOR ALIGNED do i=1,n_pt res(i) = c * B_00(i) * res(i) enddo else call I_x1_new(a-2,c,B_10,B_01,B_00,res,n_pt) call I_x1_new(a-1,c-1,B_10,B_01,B_00,res2,n_pt) - !DIR$ VECTOR ALIGNED do i=1,n_pt res(i) = (a-1) * B_10(i) * res(i) & + c * B_00(i) * res2(i) @@ -759,18 +749,15 @@ recursive subroutine I_x2_new(c,B_10,B_01,B_00,res,n_pt) integer :: i if(c==1)then - !DIR$ VECTOR ALIGNED do i=1,n_pt res(i) = 0.d0 enddo elseif(c==0) then - !DIR$ VECTOR ALIGNED do i=1,n_pt res(i) = 1.d0 enddo else call I_x1_new(0,c-2,B_10,B_01,B_00,res,n_pt) - !DIR$ VECTOR ALIGNED do i=1,n_pt res(i) = (c-1) * B_01(i) * res(i) enddo @@ -906,7 +893,6 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt integer :: nx, ix,iy,ny ASSERT (a>2) - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in X(ix) = 0.d0 @@ -921,17 +907,15 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-2,c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) endif - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,nx X(ix) *= dble(a-1) enddo - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(X,nx,B_10,2,d,nd) nx = nd - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in X(ix) = 0.d0 @@ -945,19 +929,17 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) endif if (c>1) then - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,nx X(ix) *= c enddo endif - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(X,nx,B_00,2,d,nd) endif ny=0 - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in Y(ix) = 0.d0 @@ -970,7 +952,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) endif - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(Y,ny,C_00,2,d,nd) end @@ -997,7 +979,6 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif nx = nd - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in X(ix) = 0.d0 @@ -1005,26 +986,24 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) if (c>1) then - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,nx X(ix) *= dble(c) enddo endif - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(X,nx,B_00,2,d,nd) ny=0 - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in Y(ix) = 0.d0 enddo call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(Y,ny,C_00,2,d,nd) end @@ -1045,7 +1024,6 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y integer :: nx, ix,iy,ny - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in X(ix) = 0.d0 @@ -1053,11 +1031,10 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) nx = 0 call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(X,nx,B_10,2,d,nd) nx = nd - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in X(ix) = 0.d0 @@ -1067,26 +1044,24 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x1_pol_mult_a1(c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) if (c>1) then - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,nx X(ix) *= dble(c) enddo endif - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(X,nx,B_00,2,d,nd) ny=0 - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(8) do ix=0,n_pt_in Y(ix) = 0.d0 enddo - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(Y,ny,C_00,2,d,nd) end @@ -1104,7 +1079,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) double precision, intent(in) :: B_10(0:2),B_01(0:2),B_00(0:2),C_00(0:2),D_00(0:2) integer :: nx, ix,ny double precision :: X(0:max_dim),Y(0:max_dim) - !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y integer :: i select case (c) @@ -1135,13 +1110,12 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(Y,ny,D_00,2,d,nd) return case default - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(6) do ix=0,c+c X(ix) = 0.d0 @@ -1149,24 +1123,22 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) nx = 0 call I_x2_pol_mult(c-2,B_10,B_01,B_00,C_00,D_00,X,nx,dim) - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(6) do ix=0,nx X(ix) *= dble(c-1) enddo - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(X,nx,B_01,2,d,nd) ny = 0 - !DIR$ VECTOR ALIGNED !DIR$ LOOP COUNT(6) do ix=0,c+c Y(ix) = 0.d0 enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(Y,ny,D_00,2,d,nd) end select diff --git a/src/Integrals_Bielec/gauss_legendre.irp.f b/src/Integrals_Bielec/gauss_legendre.irp.f index e62febeb..e7545d58 100644 --- a/src/Integrals_Bielec/gauss_legendre.irp.f +++ b/src/Integrals_Bielec/gauss_legendre.irp.f @@ -1,14 +1,5 @@ -BEGIN_PROVIDER [ integer, n_pt_max_integrals_16 ] - implicit none - BEGIN_DOC - ! Aligned n_pt_max_integrals - END_DOC - integer, external :: align_double - n_pt_max_integrals_16 = align_double(n_pt_max_integrals) -END_PROVIDER - - BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals_16,n_pt_max_integrals/2) ] -&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals_16,n_pt_max_integrals/2) ] + BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals,n_pt_max_integrals/2) ] +&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals,n_pt_max_integrals/2) ] implicit none BEGIN_DOC ! t_w(i,1,k) = w(i) diff --git a/src/Integrals_Bielec/integrals_3_index.irp.f b/src/Integrals_Bielec/integrals_3_index.irp.f index 41037b34..fd3551bb 100644 --- a/src/Integrals_Bielec/integrals_3_index.irp.f +++ b/src/Integrals_Bielec/integrals_3_index.irp.f @@ -1,5 +1,5 @@ - BEGIN_PROVIDER [double precision, big_array_coulomb_integrals, (mo_tot_num_align,mo_tot_num, mo_tot_num)] -&BEGIN_PROVIDER [double precision, big_array_exchange_integrals,(mo_tot_num_align,mo_tot_num, mo_tot_num)] + BEGIN_PROVIDER [double precision, big_array_coulomb_integrals, (mo_tot_num,mo_tot_num, mo_tot_num)] +&BEGIN_PROVIDER [double precision, big_array_exchange_integrals,(mo_tot_num,mo_tot_num, mo_tot_num)] implicit none integer :: i,j,k,l double precision :: get_mo_bielec_integral diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 6a75e523..2375ddc4 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -207,7 +207,7 @@ subroutine add_integrals_to_map(mask_ijkl) double precision, allocatable :: bielec_tmp_1(:) double precision, allocatable :: bielec_tmp_2(:,:) double precision, allocatable :: bielec_tmp_3(:,:,:) - !DEC$ ATTRIBUTES ALIGN : 64 :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3 + !DIR$ ATTRIBUTES ALIGN : 64 :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3 integer :: n_integrals integer :: size_buffer @@ -276,7 +276,7 @@ subroutine add_integrals_to_map(mask_ijkl) size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +& + print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' call wall_time(wall_1) @@ -289,18 +289,18 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & !$OMP wall_0,thread_num,accu_bis) & !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,mo_tot_num_align,& + !$OMP SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l, & !$OMP mo_coef_transp, & !$OMP mo_coef_transp_is_built, list_ijkl, & !$OMP mo_coef_is_built, wall_1, & !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) n_integrals = 0 wall_0 = wall_1 - allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k), & - bielec_tmp_1(mo_tot_num_align), & + allocate(bielec_tmp_3(mo_tot_num, n_j, n_k), & + bielec_tmp_1(mo_tot_num), & bielec_tmp_0(ao_num,ao_num), & bielec_tmp_0_idx(ao_num), & - bielec_tmp_2(mo_tot_num_align, n_j), & + bielec_tmp_2(mo_tot_num, n_j), & buffer_i(size_buffer), & buffer_value(size_buffer) ) @@ -308,10 +308,8 @@ subroutine add_integrals_to_map(mask_ijkl) !$ thread_num = omp_get_thread_num() !$OMP DO SCHEDULE(guided) do l1 = 1,ao_num - !DEC$ VECTOR ALIGNED bielec_tmp_3 = 0.d0 do k1 = 1,ao_num - !DEC$ VECTOR ALIGNED bielec_tmp_2 = 0.d0 do j1 = 1,ao_num call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1)) @@ -333,7 +331,6 @@ subroutine add_integrals_to_map(mask_ijkl) cycle endif - !DEC$ VECTOR ALIGNED bielec_tmp_1 = 0.d0 ii1=1 do ii1 = 1,kmax-4,4 @@ -443,7 +440,7 @@ subroutine add_integrals_to_map(mask_ijkl) endif n_integrals += 1 buffer_value(n_integrals) = bielec_tmp_1(i) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals)) if (n_integrals == size_buffer) then call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& @@ -514,7 +511,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) double precision, allocatable :: bielec_tmp_1(:) double precision, allocatable :: bielec_tmp_2(:,:) double precision, allocatable :: bielec_tmp_3(:,:,:) - !DEC$ ATTRIBUTES ALIGN : 64 :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3 + !DIR$ ATTRIBUTES ALIGN : 64 :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3 integer :: n_integrals integer :: size_buffer @@ -571,7 +568,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +& + print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' call wall_time(wall_1) @@ -583,18 +580,18 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & !$OMP wall_0,thread_num,accu_bis) & !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,mo_tot_num_align,& + !$OMP SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k, & !$OMP mo_coef_transp, & !$OMP mo_coef_transp_is_built, list_ijkl, & !$OMP mo_coef_is_built, wall_1, & !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) n_integrals = 0 wall_0 = wall_1 - allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k), & - bielec_tmp_1(mo_tot_num_align), & + allocate(bielec_tmp_3(mo_tot_num, n_j, n_k), & + bielec_tmp_1(mo_tot_num), & bielec_tmp_0(ao_num,ao_num), & bielec_tmp_0_idx(ao_num), & - bielec_tmp_2(mo_tot_num_align, n_j), & + bielec_tmp_2(mo_tot_num, n_j), & buffer_i(size_buffer), & buffer_value(size_buffer) ) @@ -602,10 +599,8 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) !$ thread_num = omp_get_thread_num() !$OMP DO SCHEDULE(guided) do l1 = 1,ao_num - !DEC$ VECTOR ALIGNED bielec_tmp_3 = 0.d0 do k1 = 1,ao_num - !DEC$ VECTOR ALIGNED bielec_tmp_2 = 0.d0 do j1 = 1,ao_num call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1)) @@ -626,7 +621,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) cycle endif - !DEC$ VECTOR ALIGNED bielec_tmp_1 = 0.d0 ii1=1 do ii1 = 1,kmax-4,4 @@ -728,7 +722,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) if(i==k .and. j==l .and. i.ne.j)then buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0 endif - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals)) if (n_integrals == size_buffer) then call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& @@ -760,7 +754,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) n_integrals += 1 buffer_value(n_integrals) = bielec_tmp_1(i) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals)) if (n_integrals == size_buffer) then call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& @@ -828,7 +822,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) double precision, allocatable :: bielec_tmp_1(:) double precision, allocatable :: bielec_tmp_2(:,:) double precision, allocatable :: bielec_tmp_3(:,:,:) - !DEC$ ATTRIBUTES ALIGN : 64 :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3 + !DIR$ ATTRIBUTES ALIGN : 64 :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3 integer :: n_integrals integer :: size_buffer @@ -853,7 +847,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +& + print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' call wall_time(wall_1) @@ -864,18 +858,18 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & !$OMP wall_0,thread_num) & !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,mo_tot_num_align,& + !$OMP SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l, & !$OMP mo_coef_transp, & !$OMP mo_coef_transp_is_built, list_ijkl, & !$OMP mo_coef_is_built, wall_1, & !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) n_integrals = 0 wall_0 = wall_1 - allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k), & - bielec_tmp_1(mo_tot_num_align), & + allocate(bielec_tmp_3(mo_tot_num, n_j, n_k), & + bielec_tmp_1(mo_tot_num), & bielec_tmp_0(ao_num,ao_num), & bielec_tmp_0_idx(ao_num), & - bielec_tmp_2(mo_tot_num_align, n_j), & + bielec_tmp_2(mo_tot_num, n_j), & buffer_i(size_buffer), & buffer_value(size_buffer) ) @@ -888,10 +882,8 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) ! cycle ! endif !IRP_ENDIF - !DEC$ VECTOR ALIGNED bielec_tmp_3 = 0.d0 do k1 = 1,ao_num - !DEC$ VECTOR ALIGNED bielec_tmp_2 = 0.d0 do j1 = 1,ao_num call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1)) @@ -913,7 +905,6 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) cycle endif - !DEC$ VECTOR ALIGNED bielec_tmp_1 = 0.d0 ii1=1 do ii1 = 1,kmax-4,4 @@ -1018,7 +1009,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) endif n_integrals += 1 buffer_value(n_integrals) = bielec_tmp_1(i) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals)) if (n_integrals == size_buffer) then call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& @@ -1071,9 +1062,9 @@ end - BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_from_ao, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange_from_ao, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti_from_ao, (mo_tot_num_align,mo_tot_num) ] + BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_from_ao, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange_from_ao, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti_from_ao, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! mo_bielec_integral_jj_from_ao(i,j) = J_ij @@ -1103,20 +1094,19 @@ end !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(mo_tot_num,mo_coef_transp,mo_tot_num_align,ao_num,& + !$OMP SHARED(mo_tot_num,mo_coef_transp,ao_num, & !$OMP ao_integrals_threshold,do_direct_integrals) & !$OMP REDUCTION(+:mo_bielec_integral_jj_from_ao,mo_bielec_integral_jj_exchange_from_ao) allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_tot_num_align,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),& - iqsr(mo_tot_num_align,ao_num) ) + iqrs(mo_tot_num,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),& + iqsr(mo_tot_num,ao_num) ) !$OMP DO SCHEDULE (guided) do s=1,ao_num do q=1,ao_num do j=1,ao_num - !DIR$ VECTOR ALIGNED do i=1,mo_tot_num iqrs(i,j) = 0.d0 iqsr(i,j) = 0.d0 @@ -1130,7 +1120,6 @@ end do p=1,ao_num integral = int_value(p) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i=1,mo_tot_num iqrs(i,r) += mo_coef_transp(i,p) * integral enddo @@ -1140,7 +1129,6 @@ end do p=1,ao_num integral = int_value(p) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i=1,mo_tot_num iqsr(i,r) += mo_coef_transp(i,p) * integral enddo @@ -1156,7 +1144,6 @@ end p = int_idx(pp) integral = int_value(pp) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i=1,mo_tot_num iqrs(i,r) += mo_coef_transp(i,p) * integral enddo @@ -1167,7 +1154,6 @@ end p = int_idx(pp) integral = int_value(pp) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i=1,mo_tot_num iqsr(i,r) += mo_coef_transp(i,p) * integral enddo @@ -1178,14 +1164,12 @@ end iqis = 0.d0 iqri = 0.d0 do r=1,ao_num - !DIR$ VECTOR ALIGNED do i=1,mo_tot_num iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) enddo enddo do i=1,mo_tot_num - !DIR$ VECTOR ALIGNED do j=1,mo_tot_num c = mo_coef_transp(j,q)*mo_coef_transp(j,s) mo_bielec_integral_jj_from_ao(j,i) += c * iqis(i) @@ -1204,9 +1188,9 @@ end END_PROVIDER - BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_from_ao, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_exchange_from_ao, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_anti_from_ao, (mo_tot_num_align,mo_tot_num) ] + BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_from_ao, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_exchange_from_ao, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_anti_from_ao, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! mo_bielec_integral_vv_from_ao(i,j) = J_ij @@ -1238,20 +1222,19 @@ END_PROVIDER !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx,& !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(n_virt_orb,mo_tot_num,list_virt,mo_coef_transp,mo_tot_num_align,ao_num,& + !$OMP SHARED(n_virt_orb,mo_tot_num,list_virt,mo_coef_transp,ao_num,& !$OMP ao_integrals_threshold,do_direct_integrals) & !$OMP REDUCTION(+:mo_bielec_integral_vv_from_ao,mo_bielec_integral_vv_exchange_from_ao) allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_tot_num_align,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),& - iqsr(mo_tot_num_align,ao_num) ) + iqrs(mo_tot_num,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),& + iqsr(mo_tot_num,ao_num) ) !$OMP DO SCHEDULE (guided) do s=1,ao_num do q=1,ao_num do j=1,ao_num - !DIR$ VECTOR ALIGNED do i0=1,n_virt_orb i = list_virt(i0) iqrs(i,j) = 0.d0 @@ -1266,7 +1249,6 @@ END_PROVIDER do p=1,ao_num integral = int_value(p) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i0=1,n_virt_orb i = list_virt(i0) iqrs(i,r) += mo_coef_transp(i,p) * integral @@ -1277,7 +1259,6 @@ END_PROVIDER do p=1,ao_num integral = int_value(p) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i0=1,n_virt_orb i =list_virt(i0) iqsr(i,r) += mo_coef_transp(i,p) * integral @@ -1294,7 +1275,6 @@ END_PROVIDER p = int_idx(pp) integral = int_value(pp) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i0=1,n_virt_orb i =list_virt(i0) iqrs(i,r) += mo_coef_transp(i,p) * integral @@ -1306,7 +1286,6 @@ END_PROVIDER p = int_idx(pp) integral = int_value(pp) if (abs(integral) > ao_integrals_threshold) then - !DIR$ VECTOR ALIGNED do i0=1,n_virt_orb i = list_virt(i0) iqsr(i,r) += mo_coef_transp(i,p) * integral @@ -1318,7 +1297,6 @@ END_PROVIDER iqis = 0.d0 iqri = 0.d0 do r=1,ao_num - !DIR$ VECTOR ALIGNED do i0=1,n_virt_orb i = list_virt(i0) iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) @@ -1327,7 +1305,6 @@ END_PROVIDER enddo do i0=1,n_virt_orb i= list_virt(i0) - !DIR$ VECTOR ALIGNED do j0=1,n_virt_orb j = list_virt(j0) c = mo_coef_transp(j,q)*mo_coef_transp(j,s) @@ -1354,9 +1331,9 @@ END_PROVIDER END_PROVIDER - BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange, (mo_tot_num_align,mo_tot_num) ] -&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti, (mo_tot_num_align,mo_tot_num) ] + BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange, (mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti, (mo_tot_num,mo_tot_num) ] implicit none BEGIN_DOC ! mo_bielec_integral_jj(i,j) = J_ij diff --git a/src/Integrals_Monoelec/ao_mono_ints.irp.f b/src/Integrals_Monoelec/ao_mono_ints.irp.f index 4646326e..87d03ac4 100644 --- a/src/Integrals_Monoelec/ao_mono_ints.irp.f +++ b/src/Integrals_Monoelec/ao_mono_ints.irp.f @@ -1,4 +1,4 @@ - BEGIN_PROVIDER [ double precision, ao_mono_elec_integral,(ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_mono_elec_integral,(ao_num,ao_num)] &BEGIN_PROVIDER [ double precision, ao_mono_elec_integral_diag,(ao_num)] implicit none integer :: i,j,n,l @@ -7,7 +7,6 @@ ! : sum of the kinetic and nuclear electronic potential END_DOC do j = 1, ao_num - !DIR$ VECTOR ALIGNED do i = 1, ao_num ao_mono_elec_integral(i,j) = ao_nucl_elec_integral(i,j) + ao_kinetic_integral(i,j) + ao_pseudo_integral(i,j) enddo diff --git a/src/Integrals_Monoelec/kin_mo_ints.irp.f b/src/Integrals_Monoelec/kin_mo_ints.irp.f index 2301c23d..262e4805 100644 --- a/src/Integrals_Monoelec/kin_mo_ints.irp.f +++ b/src/Integrals_Monoelec/kin_mo_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, mo_kinetic_integral, (mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [double precision, mo_kinetic_integral, (mo_tot_num,mo_tot_num)] implicit none BEGIN_DOC ! Kinetic energy integrals in the MO basis diff --git a/src/Integrals_Monoelec/mo_mono_ints.irp.f b/src/Integrals_Monoelec/mo_mono_ints.irp.f index 0d912852..891ed3d5 100644 --- a/src/Integrals_Monoelec/mo_mono_ints.irp.f +++ b/src/Integrals_Monoelec/mo_mono_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [ double precision, mo_mono_elec_integral,(mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [ double precision, mo_mono_elec_integral,(mo_tot_num,mo_tot_num)] implicit none integer :: i,j,n,l BEGIN_DOC diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f index 22cceab9..2a1eaf67 100644 --- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num_align,ao_num)] +BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num,ao_num)] implicit none BEGIN_DOC ! Pseudo-potential integrals @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num_align,ao_num)] END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_num)] +BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)] implicit none BEGIN_DOC ! Local pseudo-potential @@ -128,7 +128,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_pseudo_integral_non_local, (ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_pseudo_integral_non_local, (ao_num,ao_num)] implicit none BEGIN_DOC ! Local pseudo-potential diff --git a/src/Integrals_Monoelec/pot_mo_ints.irp.f b/src/Integrals_Monoelec/pot_mo_ints.irp.f index 7c7e306f..5810b4f3 100644 --- a/src/Integrals_Monoelec/pot_mo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_mo_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num,mo_tot_num)] implicit none BEGIN_DOC ! interaction nuclear electron on the MO basis @@ -25,7 +25,7 @@ BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_to END_PROVIDER -BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num_align,mo_tot_num,nucl_num)] +BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num,mo_tot_num,nucl_num)] implicit none BEGIN_DOC ! mo_nucl_elec_integral_per_atom(i,j,k) = - diff --git a/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f index f2fee5f4..47e6e277 100644 --- a/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, mo_pseudo_integral, (mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [double precision, mo_pseudo_integral, (mo_tot_num,mo_tot_num)] implicit none BEGIN_DOC ! interaction nuclear electron on the MO basis diff --git a/src/Integrals_Monoelec/spread_dipole_ao.irp.f b/src/Integrals_Monoelec/spread_dipole_ao.irp.f index 5611ec7f..2ff1494f 100644 --- a/src/Integrals_Monoelec/spread_dipole_ao.irp.f +++ b/src/Integrals_Monoelec/spread_dipole_ao.irp.f @@ -1,6 +1,6 @@ - BEGIN_PROVIDER [ double precision, ao_spread_x, (ao_num_align,ao_num)] - &BEGIN_PROVIDER [ double precision, ao_spread_y, (ao_num_align,ao_num)] - &BEGIN_PROVIDER [ double precision, ao_spread_z, (ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_spread_x, (ao_num,ao_num)] + &BEGIN_PROVIDER [ double precision, ao_spread_y, (ao_num,ao_num)] + &BEGIN_PROVIDER [ double precision, ao_spread_z, (ao_num,ao_num)] BEGIN_DOC ! array of the integrals of AO_i * x^2 AO_j ! array of the integrals of AO_i * y^2 AO_j @@ -35,8 +35,6 @@ power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num B_center(1) = nucl_coord( ao_nucl(i), 1 ) B_center(2) = nucl_coord( ao_nucl(i), 2 ) @@ -49,7 +47,6 @@ accu_z = 0.d0 do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) c = ao_coef_normalized_ordered_transp(n,j)*ao_coef_normalized_ordered_transp(l,i) beta = ao_expo_ordered_transp(l,i) @@ -72,9 +69,9 @@ - BEGIN_PROVIDER [ double precision, ao_dipole_x, (ao_num_align,ao_num)] - &BEGIN_PROVIDER [ double precision, ao_dipole_y, (ao_num_align,ao_num)] - &BEGIN_PROVIDER [ double precision, ao_dipole_z, (ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_dipole_x, (ao_num,ao_num)] + &BEGIN_PROVIDER [ double precision, ao_dipole_y, (ao_num,ao_num)] + &BEGIN_PROVIDER [ double precision, ao_dipole_z, (ao_num,ao_num)] BEGIN_DOC ! array of the integrals of AO_i * x AO_j ! array of the integrals of AO_i * y AO_j @@ -109,8 +106,6 @@ power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num B_center(1) = nucl_coord( ao_nucl(i), 1 ) B_center(2) = nucl_coord( ao_nucl(i), 2 ) @@ -123,7 +118,6 @@ accu_z = 0.d0 do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(l,i) c = ao_coef_normalized_ordered_transp(l,i)*ao_coef_normalized_ordered_transp(n,j) @@ -145,9 +139,9 @@ !$OMP END PARALLEL DO END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_deriv_1_x, (ao_num_align,ao_num)] - &BEGIN_PROVIDER [ double precision, ao_deriv_1_y, (ao_num_align,ao_num)] - &BEGIN_PROVIDER [ double precision, ao_deriv_1_z, (ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_deriv_1_x, (ao_num,ao_num)] + &BEGIN_PROVIDER [ double precision, ao_deriv_1_y, (ao_num,ao_num)] + &BEGIN_PROVIDER [ double precision, ao_deriv_1_z, (ao_num,ao_num)] BEGIN_DOC ! array of the integrals of AO_i * d/dx AO_j ! array of the integrals of AO_i * d/dy AO_j @@ -183,8 +177,6 @@ power_A(1) = ao_power( j, 1 ) power_A(2) = ao_power( j, 2 ) power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS do i= 1,ao_num B_center(1) = nucl_coord( ao_nucl(i), 1 ) B_center(2) = nucl_coord( ao_nucl(i), 2 ) @@ -197,7 +189,6 @@ accu_z = 0.d0 do n = 1,ao_prim_num(j) alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED do l = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(l,i) call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) diff --git a/src/Integrals_Monoelec/spread_dipole_mo.irp.f b/src/Integrals_Monoelec/spread_dipole_mo.irp.f index aa5ef8aa..9e21ec21 100644 --- a/src/Integrals_Monoelec/spread_dipole_mo.irp.f +++ b/src/Integrals_Monoelec/spread_dipole_mo.irp.f @@ -1,6 +1,6 @@ - BEGIN_PROVIDER [double precision, mo_dipole_x , (mo_tot_num_align,mo_tot_num)] -&BEGIN_PROVIDER [double precision, mo_dipole_y , (mo_tot_num_align,mo_tot_num)] -&BEGIN_PROVIDER [double precision, mo_dipole_z , (mo_tot_num_align,mo_tot_num)] + BEGIN_PROVIDER [double precision, mo_dipole_x , (mo_tot_num,mo_tot_num)] +&BEGIN_PROVIDER [double precision, mo_dipole_y , (mo_tot_num,mo_tot_num)] +&BEGIN_PROVIDER [double precision, mo_dipole_z , (mo_tot_num,mo_tot_num)] BEGIN_DOC ! array of the integrals of MO_i * x MO_j ! array of the integrals of MO_i * y MO_j @@ -29,9 +29,9 @@ END_PROVIDER - BEGIN_PROVIDER [double precision, mo_spread_x , (mo_tot_num_align,mo_tot_num)] -&BEGIN_PROVIDER [double precision, mo_spread_y , (mo_tot_num_align,mo_tot_num)] -&BEGIN_PROVIDER [double precision, mo_spread_z , (mo_tot_num_align,mo_tot_num)] + BEGIN_PROVIDER [double precision, mo_spread_x , (mo_tot_num,mo_tot_num)] +&BEGIN_PROVIDER [double precision, mo_spread_y , (mo_tot_num,mo_tot_num)] +&BEGIN_PROVIDER [double precision, mo_spread_z , (mo_tot_num,mo_tot_num)] BEGIN_DOC ! array of the integrals of MO_i * x^2 MO_j ! array of the integrals of MO_i * y^2 MO_j diff --git a/src/MOGuess/mo_ortho_lowdin.irp.f b/src/MOGuess/mo_ortho_lowdin.irp.f index 519e4f0d..ab90aa3f 100644 --- a/src/MOGuess/mo_ortho_lowdin.irp.f +++ b/src/MOGuess/mo_ortho_lowdin.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)] +BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num,ao_num)] implicit none BEGIN_DOC ! matrix of the coefficients of the mos generated by the @@ -8,12 +8,12 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)] integer :: i,j,k,l double precision :: accu double precision, allocatable :: tmp_matrix(:,:) - allocate (tmp_matrix(ao_num_align,ao_num)) + allocate (tmp_matrix(ao_num,ao_num)) tmp_matrix(:,:) = 0.d0 do j=1, ao_num tmp_matrix(j,j) = 1.d0 enddo - call ortho_lowdin(ao_overlap,ao_num_align,ao_num,tmp_matrix,ao_num_align,ao_num) + call ortho_lowdin(ao_overlap,ao_num,ao_num,tmp_matrix,ao_num,ao_num) do i=1, ao_num do j=1, ao_num ao_ortho_lowdin_coef(j,i) = tmp_matrix(i,j) @@ -22,7 +22,7 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)] deallocate(tmp_matrix) END_PROVIDER -BEGIN_PROVIDER [double precision, ao_ortho_lowdin_overlap, (ao_num_align,ao_num)] +BEGIN_PROVIDER [double precision, ao_ortho_lowdin_overlap, (ao_num,ao_num)] implicit none BEGIN_DOC ! overlap matrix of the ao_ortho_lowdin diff --git a/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f b/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f index 9c61ebcd..78fe7948 100644 --- a/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f +++ b/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integral, (mo_tot_num,mo_tot_num)] implicit none integer :: i1,j1,i,j double precision :: c_i1,c_j1 diff --git a/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f b/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f index 3b1875f0..27ad6503 100644 --- a/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f +++ b/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f @@ -1,4 +1,4 @@ -BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integral, (mo_tot_num,mo_tot_num)] implicit none integer :: i1,j1,i,j double precision :: c_i1,c_j1 diff --git a/src/MO_Basis/ao_ortho_canonical.irp.f b/src/MO_Basis/ao_ortho_canonical.irp.f index 2184ce4a..b0400f67 100644 --- a/src/MO_Basis/ao_ortho_canonical.irp.f +++ b/src/MO_Basis/ao_ortho_canonical.irp.f @@ -1,4 +1,4 @@ - BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)] &BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num ] implicit none BEGIN_DOC @@ -83,7 +83,7 @@ END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num_align,ao_num)] +BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num,ao_num)] implicit none BEGIN_DOC ! ao_ortho_canonical_coef^(-1) @@ -92,7 +92,7 @@ BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num_align,ao ao_num, ao_ortho_canonical_coef_inv, size(ao_ortho_canonical_coef_inv,1)) END_PROVIDER - BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num_align,ao_num)] + BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num,ao_num)] &BEGIN_PROVIDER [ integer, ao_ortho_canonical_num ] implicit none BEGIN_DOC diff --git a/src/MO_Basis/mo_overlap.irp.f b/src/MO_Basis/mo_overlap.irp.f index c7e146bc..291bb38d 100644 --- a/src/MO_Basis/mo_overlap.irp.f +++ b/src/MO_Basis/mo_overlap.irp.f @@ -1,5 +1,5 @@ -BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)] +BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num,mo_tot_num)] implicit none integer :: i,j,n,l double precision :: f @@ -13,7 +13,6 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)] do i= 1,mo_tot_num mo_overlap(i,j) = 0.d0 do n = 1, lmax,4 - !DIR$ VECTOR ALIGNED do l = 1, ao_num mo_overlap(i,j) = mo_overlap(i,j) + mo_coef(l,i) * & ( mo_coef(n ,j) * ao_overlap(l,n ) & @@ -23,7 +22,6 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)] enddo enddo do n = lmax+1, ao_num - !DIR$ VECTOR ALIGNED do l = 1, ao_num mo_overlap(i,j) = mo_overlap(i,j) + mo_coef(n,j) * mo_coef(l,i) * ao_overlap(l,n) enddo diff --git a/src/MO_Basis/mo_permutation.irp.f b/src/MO_Basis/mo_permutation.irp.f index 72f132d7..72fbd07b 100644 --- a/src/MO_Basis/mo_permutation.irp.f +++ b/src/MO_Basis/mo_permutation.irp.f @@ -2,7 +2,7 @@ program permut_mos implicit none integer :: mo1,mo2 integer :: i,j,k,l - double precision :: mo_coef_tmp(ao_num_align,2) + double precision :: mo_coef_tmp(ao_num,2) print*,'Which MOs would you like to change ?' read(5,*)mo1,mo2 print*,'' diff --git a/src/MO_Basis/mos.irp.f b/src/MO_Basis/mos.irp.f index 4bcd7221..b2214b0d 100644 --- a/src/MO_Basis/mos.irp.f +++ b/src/MO_Basis/mos.irp.f @@ -16,17 +16,7 @@ BEGIN_PROVIDER [ integer, mo_tot_num ] END_PROVIDER -BEGIN_PROVIDER [ integer, mo_tot_num_align ] - implicit none - BEGIN_DOC - ! Aligned variable for dimensioning of arrays - END_DOC - integer :: align_double - mo_tot_num_align = align_double(mo_tot_num) -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, mo_coef, (ao_num_align,mo_tot_num) ] +BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_tot_num) ] implicit none BEGIN_DOC ! Molecular orbital coefficients on AO basis set @@ -42,32 +32,18 @@ END_PROVIDER ! Coefs call ezfio_has_mo_basis_mo_coef(exists) if (exists) then - allocate(buffer(ao_num,mo_tot_num)) - buffer = 0.d0 - call ezfio_get_mo_basis_mo_coef(buffer) - do i=1,mo_tot_num - do j=1,ao_num - mo_coef(j,i) = buffer(j,i) - enddo - do j=ao_num+1,ao_num_align - mo_coef(j,i) = 0.d0 - enddo - enddo - deallocate(buffer) + call ezfio_get_mo_basis_mo_coef(mo_coef) else ! Orthonormalized AO basis do i=1,mo_tot_num do j=1,ao_num mo_coef(j,i) = ao_ortho_canonical_coef(j,i) enddo - do j=ao_num+1,ao_num_align - mo_coef(j,i) = 0.d0 - enddo enddo endif END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num_align, mo_tot_num) ] +BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num, mo_tot_num) ] implicit none BEGIN_DOC ! MO coefficients in orthogonalized AO basis @@ -99,7 +75,7 @@ BEGIN_PROVIDER [ character*(64), mo_label ] endif END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num_align,ao_num) ] +BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num,ao_num) ] implicit none BEGIN_DOC ! Molecular orbital coefficients on AO basis set @@ -110,14 +86,11 @@ BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num_align,ao_num) ] do i=1,mo_tot_num mo_coef_transp(i,j) = mo_coef(j,i) enddo - do i=mo_tot_num+1,mo_tot_num_align - mo_coef_transp(i,j) = 0.d0 - enddo enddo END_PROVIDER -BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num_align, mo_tot_num) ] +BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num, mo_tot_num) ] implicit none BEGIN_DOC ! Product S.C where S is the overlap matrix in the AO basis and C the mo_coef matrix. @@ -165,18 +138,18 @@ subroutine ao_to_mo(A_ao,LDA_ao,A_mo,LDA_mo) double precision, intent(out) :: A_mo(LDA_mo,mo_tot_num) double precision, allocatable :: T(:,:) - allocate ( T(ao_num_align,mo_tot_num) ) + allocate ( T(ao_num,mo_tot_num) ) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T call dgemm('N','N', ao_num, mo_tot_num, ao_num, & 1.d0, A_ao,LDA_ao, & mo_coef, size(mo_coef,1), & - 0.d0, T, ao_num_align) + 0.d0, T, size(T,1)) call dgemm('T','N', mo_tot_num, mo_tot_num, ao_num, & 1.d0, mo_coef,size(mo_coef,1), & - T, ao_num_align, & - 0.d0, A_mo, LDA_mo) + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) deallocate(T) end @@ -193,7 +166,7 @@ subroutine mo_to_ao(A_mo,LDA_mo,A_ao,LDA_ao) double precision, intent(out) :: A_ao(LDA_ao,ao_num) double precision, allocatable :: T(:,:) - allocate ( T(mo_tot_num_align,ao_num) ) + allocate ( T(mo_tot_num,ao_num) ) call dgemm('N','T', mo_tot_num, ao_num, mo_tot_num, & 1.d0, A_mo,size(A_mo,1), & @@ -219,7 +192,7 @@ subroutine mo_to_ao_no_overlap(A_mo,LDA_mo,A_ao,LDA_ao) double precision, intent(out) :: A_ao(LDA_ao,ao_num) double precision, allocatable :: T(:,:) - allocate ( T(mo_tot_num_align,ao_num) ) + allocate ( T(mo_tot_num,ao_num) ) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T call dgemm('N','T', mo_tot_num, ao_num, mo_tot_num, & @@ -281,7 +254,7 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA) double precision, intent(out) :: A(LDA,*) double precision, allocatable :: T(:,:) - allocate ( T(ao_num_align,ao_num) ) + allocate ( T(ao_num,ao_num) ) call dgemm('T','N', ao_num, ao_num, ao_num, & 1.d0, & diff --git a/src/MO_Basis/swap_mos.irp.f b/src/MO_Basis/swap_mos.irp.f index df0dc64d..80a29965 100644 --- a/src/MO_Basis/swap_mos.irp.f +++ b/src/MO_Basis/swap_mos.irp.f @@ -4,7 +4,7 @@ program swap_mos double precision :: x print *, 'MOs to swap?' read(*,*) i1, i2 - do i=1,ao_num_align + do i=1,ao_num x = mo_coef(i,i1) mo_coef(i,i1) = mo_coef(i,i2) mo_coef(i,i2) = x diff --git a/src/MO_Basis/utils.irp.f b/src/MO_Basis/utils.irp.f index 4221fce4..4806582b 100644 --- a/src/MO_Basis/utils.irp.f +++ b/src/MO_Basis/utils.irp.f @@ -60,7 +60,7 @@ subroutine mo_as_eigvectors_of_mo_matrix(matrix,n,m,label,sign,output) print *, irp_here, ': Error : m/= mo_tot_num' stop 1 endif - allocate(A(n,m),R(n,m),mo_coef_new(ao_num_align,m),eigvalues(m)) + allocate(A(n,m),R(n,m),mo_coef_new(ao_num,m),eigvalues(m)) if (sign == -1) then do j=1,m do i=1,n @@ -121,7 +121,7 @@ subroutine mo_as_svd_vectors_of_mo_matrix(matrix,lda,m,n,label) stop 1 endif - allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num_align,m),D(m),Vt(lda,n)) + allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num,m),D(m),Vt(lda,n)) do j=1,n do i=1,m @@ -167,7 +167,7 @@ subroutine mo_as_eigvectors_of_mo_matrix_sort_by_observable(matrix,observable,n, print *, irp_here, ': Error : m/= mo_tot_num' stop 1 endif - allocate(R(n,m),mo_coef_new(ao_num_align,m),eigvalues(m),value(m),iorder(m)) + allocate(R(n,m),mo_coef_new(ao_num,m),eigvalues(m),value(m),iorder(m)) mo_coef_new = mo_coef call lapack_diag(eigvalues,R,matrix,size(matrix,1),size(matrix,2)) @@ -242,7 +242,7 @@ subroutine mo_sort_by_observable(observable,label) double precision, allocatable :: mo_coef_new(:,:),value(:) integer,allocatable :: iorder(:) - allocate(mo_coef_new(ao_num_align,mo_tot_num),value(mo_tot_num),iorder(mo_tot_num)) + allocate(mo_coef_new(ao_num,mo_tot_num),value(mo_tot_num),iorder(mo_tot_num)) print*,'allocate !' mo_coef_new = mo_coef @@ -283,7 +283,7 @@ end subroutine give_specific_mos_at_r(r,mos_array, mo_coef_specific) implicit none double precision, intent(in) :: r(3) - double precision, intent(in) :: mo_coef_specific(ao_num_align, mo_tot_num) + double precision, intent(in) :: mo_coef_specific(ao_num, mo_tot_num) double precision, intent(out) :: mos_array(mo_tot_num) double precision :: aos_array(ao_num),accu integer :: i,j diff --git a/plugins/MPI/.gitignore b/src/MPI/.gitignore similarity index 100% rename from plugins/MPI/.gitignore rename to src/MPI/.gitignore diff --git a/plugins/MPI/NEEDED_CHILDREN_MODULES b/src/MPI/NEEDED_CHILDREN_MODULES similarity index 100% rename from plugins/MPI/NEEDED_CHILDREN_MODULES rename to src/MPI/NEEDED_CHILDREN_MODULES diff --git a/plugins/MPI/README.rst b/src/MPI/README.rst similarity index 100% rename from plugins/MPI/README.rst rename to src/MPI/README.rst diff --git a/plugins/MPI/mpi.irp.f b/src/MPI/mpi.irp.f similarity index 72% rename from plugins/MPI/mpi.irp.f rename to src/MPI/mpi.irp.f index 0a65b8fd..dbc2caa9 100644 --- a/plugins/MPI/mpi.irp.f +++ b/src/MPI/mpi.irp.f @@ -1,23 +1,3 @@ -BEGIN_PROVIDER [ integer, mpi_bit_kind ] - use bitmasks - implicit none - BEGIN_DOC - ! MPI bit kind type - END_DOC - IRP_IF MPI - include 'mpif.h' - if (bit_kind == 4) then - mpi_bit_kind = MPI_INTEGER4 - else if (bit_kind == 8) then - mpi_bit_kind = MPI_INTEGER8 - else - stop 'Wrong bit kind in mpi_bit_kind' - endif - IRP_ELSE - mpi_bit_kind = -1 - IRP_ENDIF -END_PROVIDER - BEGIN_PROVIDER [ logical, mpi_initialized ] implicit none BEGIN_DOC @@ -88,7 +68,6 @@ subroutine broadcast_chunks_$double(A, LDA) implicit none integer, intent(in) :: LDA $type, intent(inout) :: A(LDA) - use bitmasks BEGIN_DOC ! Broadcast with chunks of ~2GB END_DOC @@ -99,7 +78,7 @@ subroutine broadcast_chunks_$double(A, LDA) sze = min(LDA-i+1, 200000000/$8) call MPI_BCAST (A(i), sze, MPI_$DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then - print *, irp_here//': Unable to broadcast chuks $double ', i + print *, irp_here//': Unable to broadcast chunks $double ', i stop -1 endif enddo @@ -107,10 +86,9 @@ subroutine broadcast_chunks_$double(A, LDA) end SUBST [ double, type, 8, DOUBLE_PRECISION ] -double ; double precision ; 8 ; DOUBLE_PRECISION ;; -bit_kind ; integer(bit_kind) ; bit_kind_size ; BIT_KIND ;; -integer ; integer ; 4 ; INTEGER4 ;; -integer8 ; integer*8 ; 8 ; INTEGER8 ;; +double ; double precision ; 8 ; DOUBLE_PRECISION ;; +integer ; integer ; 4 ; INTEGER4 ;; +integer8 ; integer*8 ; 8 ; INTEGER8 ;; END_TEMPLATE diff --git a/src/Nuclei/nuclei.irp.f b/src/Nuclei/nuclei.irp.f index 577b8b92..10fda4cd 100644 --- a/src/Nuclei/nuclei.irp.f +++ b/src/Nuclei/nuclei.irp.f @@ -1,15 +1,4 @@ -BEGIN_PROVIDER [ integer, nucl_num_aligned ] - implicit none - BEGIN_DOC - ! Number of nuclei algined - END_DOC - - PROVIDE ezfio_filename - integer :: align_double - nucl_num_aligned = align_double(nucl_num) -END_PROVIDER - -BEGIN_PROVIDER [ double precision, nucl_coord, (nucl_num_aligned,3) ] +BEGIN_PROVIDER [ double precision, nucl_coord, (nucl_num,3) ] implicit none BEGIN_DOC @@ -79,11 +68,11 @@ BEGIN_PROVIDER [ double precision, nucl_coord_transp, (3,nucl_num) ] enddo END_PROVIDER - BEGIN_PROVIDER [ double precision, nucl_dist_2, (nucl_num_aligned,nucl_num) ] -&BEGIN_PROVIDER [ double precision, nucl_dist_vec_x, (nucl_num_aligned,nucl_num) ] -&BEGIN_PROVIDER [ double precision, nucl_dist_vec_y, (nucl_num_aligned,nucl_num) ] -&BEGIN_PROVIDER [ double precision, nucl_dist_vec_z, (nucl_num_aligned,nucl_num) ] -&BEGIN_PROVIDER [ double precision, nucl_dist, (nucl_num_aligned,nucl_num) ] + BEGIN_PROVIDER [ double precision, nucl_dist_2, (nucl_num,nucl_num) ] +&BEGIN_PROVIDER [ double precision, nucl_dist_vec_x, (nucl_num,nucl_num) ] +&BEGIN_PROVIDER [ double precision, nucl_dist_vec_y, (nucl_num,nucl_num) ] +&BEGIN_PROVIDER [ double precision, nucl_dist_vec_z, (nucl_num,nucl_num) ] +&BEGIN_PROVIDER [ double precision, nucl_dist, (nucl_num,nucl_num) ] implicit none BEGIN_DOC ! nucl_dist : Nucleus-nucleus distances @@ -105,16 +94,12 @@ END_PROVIDER endif do ie2 = 1,nucl_num - !DEC$ VECTOR ALWAYS - !DEC$ VECTOR ALIGNED - do ie1 = 1,nucl_num_aligned + do ie1 = 1,nucl_num nucl_dist_vec_x(ie1,ie2) = nucl_coord(ie1,1) - nucl_coord(ie2,1) nucl_dist_vec_y(ie1,ie2) = nucl_coord(ie1,2) - nucl_coord(ie2,2) nucl_dist_vec_z(ie1,ie2) = nucl_coord(ie1,3) - nucl_coord(ie2,3) enddo - !DEC$ VECTOR ALWAYS - !DEC$ VECTOR ALIGNED - do ie1 = 1,nucl_num_aligned + do ie1 = 1,nucl_num nucl_dist_2(ie1,ie2) = nucl_dist_vec_x(ie1,ie2)*nucl_dist_vec_x(ie1,ie2) +& nucl_dist_vec_y(ie1,ie2)*nucl_dist_vec_y(ie1,ie2) + & nucl_dist_vec_z(ie1,ie2)*nucl_dist_vec_z(ie1,ie2) diff --git a/src/Selectors_Utils/NEEDED_CHILDREN_MODULES b/src/Selectors_Utils/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..bff2467f --- /dev/null +++ b/src/Selectors_Utils/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Determinants diff --git a/src/Selectors_Utils/README.rst b/src/Selectors_Utils/README.rst new file mode 100644 index 00000000..fc264fc1 --- /dev/null +++ b/src/Selectors_Utils/README.rst @@ -0,0 +1,190 @@ +===================== +Selectors_full Module +===================== + +Needed Modules +============== + +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. + +.. image:: tree_dependency.png + +* `Determinants `_ +* `Hartree_Fock `_ + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. + + +.. image:: tree_dependency.png + +* `Determinants `_ +* `Hartree_Fock `_ + +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. + + +`coef_hf_selector `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`delta_e_per_selector `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`double_index_selectors `_ + degree of excitation respect to Hartree Fock for the wave function + .br + for the all the selectors determinants + .br + double_index_selectors = list of the index of the double excitations + .br + n_double_selectors = number of double excitations in the selectors determinants + + +`e_corr_double_only `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`e_corr_per_selectors `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`e_corr_second_order `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`exc_degree_per_selectors `_ + degree of excitation respect to Hartree Fock for the wave function + .br + for the all the selectors determinants + .br + double_index_selectors = list of the index of the double excitations + .br + n_double_selectors = number of double excitations in the selectors determinants + + +`i_h_hf_per_selectors `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`inv_selectors_coef_hf `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`inv_selectors_coef_hf_squared `_ + energy of correlation per determinant respect to the Hartree Fock determinant + .br + for the all the double excitations in the selectors determinants + .br + E_corr_per_selectors(i) = * c(D_i)/c(HF) if |D_i> is a double excitation + .br + E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation + .br + coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants + + +`n_det_selectors `_ + For Single reference wave functions, the number of selectors is 1 : the + Hartree-Fock determinant + + +`n_double_selectors `_ + degree of excitation respect to Hartree Fock for the wave function + .br + for the all the selectors determinants + .br + double_index_selectors = list of the index of the double excitations + .br + n_double_selectors = number of double excitations in the selectors determinants + + +`psi_selectors `_ + Determinants on which we apply for perturbation. + + +`psi_selectors_coef `_ + Determinants on which we apply for perturbation. + + +`psi_selectors_coef_transp `_ + Transposed psi_selectors + + +`psi_selectors_diag_h_mat `_ + Diagonal elements of the H matrix for each selectors + + +`psi_selectors_size `_ + Undocumented + + +`zmq_get_psi `_ + Get the wave function from the qp_run scheduler + + +`zmq_put_psi `_ + Put the wave function on the qp_run scheduler + diff --git a/plugins/Selectors_full/e_corr_selectors.irp.f b/src/Selectors_Utils/e_corr_selectors.irp.f similarity index 99% rename from plugins/Selectors_full/e_corr_selectors.irp.f rename to src/Selectors_Utils/e_corr_selectors.irp.f index fec480f0..6be8ce6e 100644 --- a/plugins/Selectors_full/e_corr_selectors.irp.f +++ b/src/Selectors_Utils/e_corr_selectors.irp.f @@ -1,4 +1,3 @@ - use bitmasks BEGIN_PROVIDER [integer, exc_degree_per_selectors, (N_det_selectors)] &BEGIN_PROVIDER [integer, double_index_selectors, (N_det_selectors)] diff --git a/src/Selectors_Utils/selectors.irp.f b/src/Selectors_Utils/selectors.irp.f new file mode 100644 index 00000000..708b709b --- /dev/null +++ b/src/Selectors_Utils/selectors.irp.f @@ -0,0 +1,34 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, psi_selectors_size ] + implicit none + psi_selectors_size = psi_det_size +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ] + implicit none + BEGIN_DOC + ! Transposed psi_selectors + END_DOC + integer :: i,k + + do i=1,N_det_selectors + do k=1,N_states + psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) + enddo + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] + implicit none + BEGIN_DOC + ! Diagonal elements of the H matrix for each selectors + END_DOC + integer :: i + double precision :: diag_H_mat_elem + do i = 1, N_det_selectors + psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int) + enddo +END_PROVIDER + + diff --git a/plugins/Selectors_full/zmq.irp.f b/src/Selectors_Utils/zmq.irp.f similarity index 73% rename from plugins/Selectors_full/zmq.irp.f rename to src/Selectors_Utils/zmq.irp.f index eae7e7fd..5648da67 100644 --- a/plugins/Selectors_full/zmq.irp.f +++ b/src/Selectors_Utils/zmq.irp.f @@ -19,43 +19,6 @@ subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id) end -subroutine zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x) - use f77_zmq - implicit none - BEGIN_DOC -! Put the X vector on the qp_run scheduler - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - integer, intent(in) :: worker_id - character*(*) :: name - integer, intent(in) :: size_x - double precision, intent(out) :: x(size_x) - integer :: rc - character*(256) :: msg - - - write(msg,'(A,X,I,X,A)') 'put_data', worker_id, name - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) - if (rc /= len(trim(msg))) then - print *, irp_here, ': Error sending '//name - stop 'error' - endif - - rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0) - if (rc /= size_x*8) then - print *, irp_here, ': Error sending '//name - stop 'error' - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:rc) /= 'put_data_reply ok') then - print *, rc, trim(msg) - print *, irp_here, ': Error in put_data_reply' - stop 'error' - endif - -end - BEGIN_TEMPLATE @@ -70,7 +33,7 @@ subroutine zmq_put_$X(zmq_to_qp_run_socket,worker_id) integer :: rc character*(256) :: msg - write(msg,'(A,X,I,X,A)') 'put_data', worker_id, '$X' + write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, '$X' rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) if (rc /= len(trim(msg))) then print *, irp_here, ': Error sending $X' @@ -103,7 +66,7 @@ subroutine zmq_get_$X(zmq_to_qp_run_socket, worker_id) integer :: rc character*(64) :: msg - write(msg,'(A,X,I,X,A)') 'get_data', worker_id, '$X' + write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, '$X' rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) if (rc /= len(trim(msg))) then print *, irp_here, ': Error getting $X' @@ -132,7 +95,6 @@ N_det ;; psi_det_size ;; N_det_generators ;; N_det_selectors ;; -N_states_diag ;; END_TEMPLATE @@ -147,7 +109,7 @@ subroutine zmq_put_psi_det(zmq_to_qp_run_socket,worker_id) integer :: rc, rc8 character*(256) :: msg - write(msg,'(A,X,I,X,A)') 'put_data', worker_id, 'psi_det' + write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'psi_det' rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) if (rc /= len(trim(msg))) then print *, irp_here, ': Error sending psi_det' @@ -179,7 +141,7 @@ subroutine zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id) integer :: rc, rc8 character*(256) :: msg - write(msg,'(A,X,I,X,A)') 'put_data', worker_id, 'psi_coef' + write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'psi_coef' rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) if (rc /= len(trim(msg))) then print *, irp_here, ': Error sending psi_coef' @@ -244,7 +206,7 @@ subroutine zmq_get_psi_det(zmq_to_qp_run_socket, worker_id) character*(64) :: msg - write(msg,'(A,X,I,X,A)') 'get_data', worker_id, 'psi_det' + write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'psi_det' rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) if (rc /= len(trim(msg))) then print *, irp_here, ': Error getting psi_det' @@ -279,7 +241,7 @@ subroutine zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) character*(64) :: msg - write(msg,'(A,X,I,X,A)') 'get_data', worker_id, 'psi_coef' + write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'psi_coef' rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) if (rc /= len(trim(msg))) then print *, irp_here, ': Error getting psi_coef' @@ -302,41 +264,3 @@ subroutine zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) end -subroutine zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x) - use f77_zmq - implicit none - BEGIN_DOC -! Get psi_coef from the qp_run scheduler - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - integer, intent(in) :: worker_id - integer, intent(in) :: size_x - character*(*), intent(in) :: name - double precision, intent(out) :: x(size_x) - integer :: rc - integer*8 :: rc8 - character*(64) :: msg - - write(msg,'(A,X,I,X,A)') 'get_data', worker_id, name - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) - if (rc /= len(trim(msg))) then - print *, irp_here, ': Error getting '//name - stop 'error' - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:14) /= 'get_data_reply') then - print *, rc, trim(msg) - print *, irp_here, ': Error in get_data_reply' - stop 'error' - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0) - if (rc /= size_x*8) then - print *, irp_here, ': Error getting '//name - stop 'error' - endif -end - - - diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 46859f1a..6e1b9565 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -70,7 +70,7 @@ subroutine ortho_canonical(overlap,LDA,N,C,LDC,m) double precision, allocatable :: Vt(:,:) double precision, allocatable :: D(:) double precision, allocatable :: S(:,:) - !DEC$ ATTRIBUTES ALIGN : 64 :: U, Vt, D + !DIR$ ATTRIBUTES ALIGN : 64 :: U, Vt, D integer :: info, i, j if (n < 2) then diff --git a/src/Utils/integration.irp.f b/src/Utils/integration.irp.f index ad57c52d..cac60001 100644 --- a/src/Utils/integration.irp.f +++ b/src/Utils/integration.irp.f @@ -37,7 +37,7 @@ subroutine give_explicit_poly_and_gaussian_x(P_new,P_center,p,fact_k,iorder,alph call recentered_poly2(P_a(0),A_center,P_center,a,P_b(0),B_center,P_center,b) n_new = 0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(P_a(0),a,P_b(0),b,P_new(0),n_new) iorder = a + b end @@ -76,44 +76,41 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, P_new(0,2) = 0.d0 P_new(0,3) = 0.d0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then fact_k = 0.d0 return endif - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call recentered_poly2(P_a(0,1),A_center(1),P_center(1),a(1),P_b(0,1),B_center(1),P_center(1),b(1)) iorder(1) = a(1) + b(1) - !DIR$ VECTOR ALIGNED do i=0,iorder(1) P_new(i,1) = 0.d0 enddo n_new=0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(P_a(0,1),a(1),P_b(0,1),b(1),P_new(0,1),n_new) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call recentered_poly2(P_a(0,2),A_center(2),P_center(2),a(2),P_b(0,2),B_center(2),P_center(2),b(2)) iorder(2) = a(2) + b(2) - !DIR$ VECTOR ALIGNED do i=0,iorder(2) P_new(i,2) = 0.d0 enddo n_new=0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(P_a(0,2),a(2),P_b(0,2),b(2),P_new(0,2),n_new) - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call recentered_poly2(P_a(0,3),A_center(3),P_center(3),a(3),P_b(0,3),B_center(3),P_center(3),b(3)) iorder(3) = a(3) + b(3) - !DIR$ VECTOR ALIGNED do i=0,iorder(3) P_new(i,3) = 0.d0 enddo n_new=0 - !DEC$ FORCEINLINE + !DIR$ FORCEINLINE call multiply_poly(P_a(0,3),a(3),P_b(0,3),b(3),P_new(0,3),n_new) end @@ -200,7 +197,7 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp) ASSERT (b>0.) double precision :: xab(3), ab - !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab p = a+b p_inv = 1.d0/(a+b) @@ -282,7 +279,6 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) endif ndtmp = nb+nc - !DIR$ VECTOR ALIGNED do ic = 0,nc d(ic) = d(ic) + c(ic) * b(0) enddo diff --git a/src/Utils/transpose.irp.f b/src/Utils/transpose.irp.f index ec33023d..ed86b10e 100644 --- a/src/Utils/transpose.irp.f +++ b/src/Utils/transpose.irp.f @@ -8,7 +8,7 @@ recursive subroutine transpose(A,LDA,B,LDB,d1,d2) real, intent(in) :: A(LDA,d2) real, intent(out) :: B(LDB,d1) - integer :: i,j,k, mod_align + integer :: i,j,k if ( d2 < 32 ) then do j=1,d1 !DIR$ LOOP COUNT (16) @@ -55,7 +55,7 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2) ! enddo ! return - integer :: i,j,k, mod_align + integer :: i,j,k if ( d2 < 32 ) then do j=1,d1 !DIR$ LOOP COUNT (16) diff --git a/src/Utils/util.irp.f b/src/Utils/util.irp.f index d3b20a10..e4b21f2e 100644 --- a/src/Utils/util.irp.f +++ b/src/Utils/util.irp.f @@ -10,7 +10,7 @@ double precision function binom_func(i,j) double precision :: logfact integer, save :: ifirst double precision, save :: memo(0:15,0:15) - !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: memo + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: memo integer :: k,l if (ifirst == 0) then ifirst = 1 @@ -45,20 +45,6 @@ end END_PROVIDER -integer function align_double(n) - implicit none - BEGIN_DOC - ! Compute 1st dimension such that it is aligned for vectorization. - END_DOC - integer :: n - include 'constants.include.F' - if (mod(n,SIMD_vector/4) /= 0) then - align_double= n + SIMD_vector/4 - mod(n,SIMD_vector/4) - else - align_double= n - endif -end - double precision function fact(n) implicit none @@ -333,7 +319,6 @@ subroutine normalize(u,sze) implicit none BEGIN_DOC ! Normalizes vector u - ! u is expected to be aligned in memory. END_DOC integer, intent(in) :: sze double precision, intent(inout):: u(sze) diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f new file mode 100644 index 00000000..76c3a339 --- /dev/null +++ b/src/ZMQ/put_get.irp.f @@ -0,0 +1,76 @@ +subroutine zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x) + use f77_zmq + implicit none + BEGIN_DOC +! Put the X vector on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*) :: name + integer, intent(in) :: size_x + double precision, intent(out) :: x(size_x) + integer :: rc + character*(256) :: msg + + + write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + print *, irp_here, ': Error sending '//name + stop 'error' + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0) + if (rc /= size_x*8) then + print *, irp_here, ': Error sending '//name + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + print *, rc, trim(msg) + print *, irp_here, ': Error in put_data_reply' + stop 'error' + endif + +end + + +subroutine zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x) + use f77_zmq + implicit none + BEGIN_DOC +! Get psi_coef from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer, intent(in) :: size_x + character*(*), intent(in) :: name + double precision, intent(out) :: x(size_x) + integer :: rc + integer*8 :: rc8 + character*(64) :: msg + + write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + print *, irp_here, ': Error getting '//name + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + print *, rc, trim(msg) + print *, irp_here, ': Error in get_data_reply' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0) + if (rc /= size_x*8) then + print *, irp_here, ': Error getting '//name + stop 'error' + endif +end + + +