Removed aligns

2024-12-22 12:23:48 +01:00 · 2017-11-27 10:58:32 +01:00 · 2017-11-27 10:58:32 +01:00 · 12295ce7c0
commit 12295ce7c0
parent fc5f0b9745
66 changed files with 649 additions and 1312 deletions
--- a/plugins/DensityMatrix/density_matrix.irp.f
+++ b/plugins/DensityMatrix/density_matrix.irp.f
@ -167,9 +167,9 @@ END_PROVIDER
 END_TEMPLATE
 
 
- BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num_align,mo_tot_num)]
+ BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num,mo_tot_num)]
   implicit none
   use bitmasks
   BEGIN_DOC
--- a/plugins/FOBOCI/SC2_1h1p.irp.f
+++ b/plugins/FOBOCI/SC2_1h1p.irp.f
@ -725,8 +725,8 @@ subroutine density_matrix_1h1p(dets_in,u_in,density_matrix_alpha,density_matrix_
  integer, intent(in)            :: dim_in, sze, N_st, Nint
  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
  double precision, intent(inout) :: u_in(dim_in,N_st)
-  double precision, intent(inout)  :: density_matrix_alpha(mo_tot_num_align,mo_tot_num)
-  double precision, intent(inout)  :: density_matrix_beta(mo_tot_num_align,mo_tot_num)
+  double precision, intent(inout)  :: density_matrix_alpha(mo_tot_num,mo_tot_num)
+  double precision, intent(inout)  :: density_matrix_beta(mo_tot_num,mo_tot_num)
  double precision, intent(inout)  :: norm
  
  integer :: i,j,k,l
--- a/plugins/FOBOCI/density_matrix.irp.f
+++ b/plugins/FOBOCI/density_matrix.irp.f
@ -1,5 +1,5 @@
- BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_generators_restart, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_generators_restart, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_generators_restart, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_generators_restart, (mo_tot_num,mo_tot_num) ]
 &BEGIN_PROVIDER [ double precision, norm_generators_restart]
   implicit none
   BEGIN_DOC
@ -40,9 +40,9 @@
        !$OMP PRIVATE(j,k,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc, &
        !$OMP  tmp_a, tmp_b, n_occ_alpha)&
        !$OMP SHARED(psi_det_generators_restart,psi_coef_generators_restart,N_int,elec_alpha_num,&
-        !$OMP  elec_beta_num,one_body_dm_mo_alpha_generators_restart,one_body_dm_mo_beta_generators_restart,N_det_generators_restart,mo_tot_num_align,&
+        !$OMP  elec_beta_num,one_body_dm_mo_alpha_generators_restart,one_body_dm_mo_beta_generators_restart,N_det_generators_restart,&
        !$OMP  mo_tot_num,N_states, state_average_weight)
-     allocate(tmp_a(mo_tot_num_align,mo_tot_num), tmp_b(mo_tot_num_align,mo_tot_num) )
+     allocate(tmp_a(mo_tot_num,mo_tot_num), tmp_b(mo_tot_num,mo_tot_num) )
     tmp_a = 0.d0
     tmp_b = 0.d0
     !$OMP DO SCHEDULE(dynamic)
@ -98,7 +98,7 @@ END_PROVIDER



-BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_num,mo_tot_num) ]
 implicit none
 BEGIN_DOC
 ! One-body density matrix for the generators_restart
@ -106,7 +106,7 @@ BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_nu
 one_body_dm_mo_generators_restart = one_body_dm_mo_alpha_generators_restart + one_body_dm_mo_beta_generators_restart
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, (mo_tot_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, (mo_tot_num,mo_tot_num) ]
 implicit none
 BEGIN_DOC
 ! rho(alpha) - rho(beta)
@ -115,16 +115,16 @@ BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart,
 END_PROVIDER


- BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_osoci, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_osoci, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_osoci, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_osoci, (mo_tot_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! Alpha and beta one-body density matrix that will be used for the OSOCI approach
   END_DOC
 END_PROVIDER

- BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_1h1p, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_1h1p, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_1h1p, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_1h1p, (mo_tot_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! Alpha and beta one-body density matrix that will be used for the 1h1p approach
--- a/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f
+++ b/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f
@ -1,7 +1,7 @@
 subroutine diag_inactive_virt_and_update_mos
 implicit none
 integer :: i,j,i_inact,j_inact,i_virt,j_virt
- double precision :: tmp(mo_tot_num_align,mo_tot_num)
+ double precision :: tmp(mo_tot_num,mo_tot_num)
 character*(64) :: label
 print*,'Diagonalizing the occ and virt Fock operator'
 tmp = 0.d0
@ -38,7 +38,7 @@ end
 subroutine diag_inactive_virt_new_and_update_mos
 implicit none
 integer :: i,j,i_inact,j_inact,i_virt,j_virt,k,k_act
- double precision :: tmp(mo_tot_num_align,mo_tot_num),accu,get_mo_bielec_integral
+ double precision :: tmp(mo_tot_num,mo_tot_num),accu,get_mo_bielec_integral
 character*(64) :: label
 tmp = 0.d0
 do i = 1, mo_tot_num
--- a/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f
+++ b/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num,ao_num) ]
  implicit none
  BEGIN_DOC
  ! Dressing of the core hamiltonian in the orthogonal AO basis set
@ -25,7 +25,7 @@ BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num
 END_PROVIDER


-BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral, (ao_num_align, ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral, (ao_num, ao_num) ]
 implicit none
 BEGIN_DOC
 ! h core in orthogonal AO basis
@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, ao_mono_elec_integral_dressing, (ao_num,ao_nu
    ao_mono_elec_integral_dressing,size(ao_mono_elec_integral_dressing,1))
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_dressing, (mo_tot_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_dressing, (mo_tot_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! Dressing of the core hamiltonian in the MO basis set
@ -73,14 +73,14 @@ BEGIN_PROVIDER [ integer, idx_dressing ]
 END_PROVIDER


-BEGIN_PROVIDER [ double precision, cusp_corrected_mos, (ao_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, cusp_corrected_mos, (ao_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! Dressing core hamiltonian in the AO basis set
  END_DOC
  integer :: i,j
  double precision, allocatable :: F(:,:), M(:,:)
-  allocate(F(mo_tot_num_align,mo_tot_num),M(ao_num,mo_tot_num))
+  allocate(F(mo_tot_num,mo_tot_num),M(ao_num,mo_tot_num))

  logical :: oneshot

--- a/plugins/MRCC_Utils/davidson.irp.f
+++ b/plugins/MRCC_Utils/davidson.irp.f
@ -85,7 +85,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
  double precision, intent(out)  :: energies(N_st_diag)
  
-  integer                        :: sze_8
  integer                        :: iter
  integer                        :: i,j,k,l,m
  logical                        :: converged
@ -138,13 +137,10 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
  enddo
  write(iunit,'(A)') trim(write_buffer)

-  integer, external :: align_double
-  sze_8 = align_double(sze)
-
  allocate(                                                          &
-      W(sze_8,N_st_diag,davidson_sze_max),                           &
-      U(sze_8,N_st_diag,davidson_sze_max),                           &
-      R(sze_8,N_st_diag),                                            &
+      W(sze,N_st_diag,davidson_sze_max),                           &
+      U(sze,N_st_diag,davidson_sze_max),                           &
+      R(sze,N_st_diag),                                            &
      h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max),      &
      y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max),      &
      residual_norm(N_st_diag),                                      &
@ -199,7 +195,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
      ! Compute |W_k> = \sum_i |i><i|H|u_k>
      ! -----------------------------------------
      
-      call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze_8)
+      call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze)
      
      
      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@ -320,7 +316,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
 end


-subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
+subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze)
  use bitmasks
  implicit none
  BEGIN_DOC
@ -329,16 +325,16 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
  ! n : number of determinants
  !
  END_DOC
-  integer, intent(in)            :: n,Nint,N_st,sze_8
+  integer, intent(in)            :: n,Nint,N_st,sze
  double precision, intent(out)  :: e_0(N_st)
-  double precision, intent(in)   :: u_0(sze_8,N_st)
+  double precision, intent(in)   :: u_0(sze,N_st)
  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
  integer,intent(in)             :: istate
  
  double precision, allocatable  :: v_0(:,:), H_jj(:)
  double precision               :: u_dot_u,u_dot_v,diag_H_mat_elem
  integer :: i,j
-  allocate(H_jj(n), v_0(sze_8,N_st))
+  allocate(H_jj(n), v_0(sze,N_st))
  do i = 1, n
   H_jj(i) = diag_H_mat_elem(keys_tmp(1,1,i),Nint)
  enddo
@ -347,7 +343,7 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
    H_jj(idx_ref(i)) +=  delta_ii(istate,i)
  enddo
  
-  call H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate,N_st,sze_8)
+  call H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate,N_st,sze)
  do i=1,N_st
    e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
  enddo
@ -355,7 +351,7 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
 end


-subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
+subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze)
  use bitmasks
  implicit none
  BEGIN_DOC
@ -365,9 +361,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
  !     
  ! H_jj : array of <j|H|j>
  END_DOC
-  integer, intent(in)            :: n,Nint,istate_in,N_st,sze_8
-  double precision, intent(out)  :: v_0(sze_8,N_st)
-  double precision, intent(in)   :: u_0(sze_8,N_st)
+  integer, intent(in)            :: n,Nint,istate_in,N_st,sze
+  double precision, intent(out)  :: v_0(sze,N_st)
+  double precision, intent(in)   :: u_0(sze,N_st)
  double precision, intent(in)   :: H_jj(n)
  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
  double precision               :: hij
@ -396,9 +392,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
  
  !$OMP PARALLEL DEFAULT(NONE)                                       &
      !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze_8,&
+      !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze,&
      !$OMP   istate_in,delta_ij,N_det_ref,N_det_non_ref,idx_ref,idx_non_ref)
-  allocate(vt(sze_8,N_st))
+  allocate(vt(sze,N_st))
  Vt = 0.d0
  
  !$OMP DO SCHEDULE(static,1)
@ -590,7 +586,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
  double precision, intent(out)  :: energies(N_st_diag)
  
-  integer                        :: sze_8
+  integer                        :: sze
  integer                        :: iter
  integer                        :: i,j,k,l,m
  logical                        :: converged
@ -649,14 +645,11 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
  enddo
  write(iunit,'(A)') trim(write_buffer)

-  integer, external :: align_double
-  sze_8 = align_double(sze)
-
  itermax = min(davidson_sze_max, sze/N_st_diag)
  allocate(                                                          &
-      W(sze_8,N_st_diag*itermax),                                    &
-      U(sze_8,N_st_diag*itermax),                                    &
-      S(sze_8,N_st_diag*itermax),                                    &
+      W(sze,N_st_diag*itermax),                                    &
+      U(sze,N_st_diag*itermax),                                    &
+      S(sze,N_st_diag*itermax),                                    &
      h(N_st_diag*itermax,N_st_diag*itermax),                        &
      y(N_st_diag*itermax,N_st_diag*itermax),                        &
      s_(N_st_diag*itermax,N_st_diag*itermax),                       &
@ -722,7 +715,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
      ! -----------------------------------------
       
      call H_S2_u_0_mrcc_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,&
-        istate,N_st_diag,sze_8)
+        istate,N_st_diag,sze)
      
      
      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@ -960,7 +953,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 end


-subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
+subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze)
  use bitmasks
  implicit none
  BEGIN_DOC
@ -972,9 +965,9 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
  !
  ! S2_jj : array of <j|S^2|j>
  END_DOC
-  integer, intent(in)            :: N_st,n,Nint, sze_8, istate_in
-  double precision, intent(out)  :: v_0(sze_8,N_st), s_0(sze_8,N_st)
-  double precision, intent(in)   :: u_0(sze_8,N_st)
+  integer, intent(in)            :: N_st,n,Nint, sze, istate_in
+  double precision, intent(out)  :: v_0(sze,N_st), s_0(sze,N_st)
+  double precision, intent(in)   :: u_0(sze,N_st)
  double precision, intent(in)   :: H_jj(n), S2_jj(n)
  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
  double precision               :: hij,s2
@ -987,20 +980,16 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
  integer(bit_kind)              :: sorted_i(Nint)

  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
-  integer                        :: N_st_8

-  integer, external              :: align_double
  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut

-  N_st_8 = align_double(N_st)
-
  ASSERT (Nint > 0)
  ASSERT (Nint == N_int)
  ASSERT (n>0)
  PROVIDE ref_bitmask_energy

  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
-  allocate(ut(N_st_8,n))
+  allocate(ut(N_st,n))

  v_0 = 0.d0
  s_0 = 0.d0
@ -1017,9 +1006,9 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
  PROVIDE delta_ij_s2 
  !$OMP PARALLEL DEFAULT(NONE)                                       &
      !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, &
+      !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st, &
      !$OMP  N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in)
-  allocate(vt(N_st_8,n),st(N_st_8,n))
+  allocate(vt(N_st,n),st(N_st,n))
  Vt = 0.d0
  St = 0.d0

--- a/plugins/Perturbation/pt2_equations.irp.f
+++ b/plugins/Perturbation/pt2_equations.irp.f
@ -314,7 +314,7 @@ subroutine pt2_epstein_nesbet_SC2_projected ($arguments)

  degree = popcnt(xor( ref_bitmask(1,1), det_pert(1,1))) +                      &
           popcnt(xor( ref_bitmask(1,2), det_pert(1,2)))
-  !DEC$ NOUNROLL
+  !DIR$ NOUNROLL
  do l=2,Nint
    degree = degree+ popcnt(xor( ref_bitmask(l,1), det_pert(l,1))) +            &
                     popcnt(xor( ref_bitmask(l,2), det_pert(l,2)))
--- a/plugins/Properties/average.irp.f
+++ b/plugins/Properties/average.irp.f
@ -1,7 +1,7 @@
 subroutine get_average(array,density,average)
 implicit none
- double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num)
- double precision, intent(in) :: density(mo_tot_num_align,mo_tot_num)
+ double precision, intent(in) :: array(mo_tot_num,mo_tot_num)
+ double precision, intent(in) :: density(mo_tot_num,mo_tot_num)
 double precision, intent(out):: average
 integer :: i,j
 BEGIN_DOC
--- a/plugins/Properties/delta_rho.irp.f
+++ b/plugins/Properties/delta_rho.irp.f
@ -73,7 +73,7 @@ END_PROVIDER



-BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_all_points, (ao_num_align, ao_num, N_z_pts)]
+BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_all_points, (ao_num, ao_num, N_z_pts)]
 BEGIN_DOC
 !  array of the overlap in x,y between the AO function and integrated between [z,z+dz] in the z axis
 !  for all the z points that are given (N_z_pts)
@ -148,7 +148,7 @@ BEGIN_PROVIDER [integer, i_unit_integrated_delta_rho]

 END_PROVIDER

-BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num_align, ao_num )]
+BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num, ao_num )]
 BEGIN_DOC
 !  array of the overlap in x,y between the AO function and integrated between [z,z+dz] in the z axis
 !  for one specific z point
@ -209,7 +209,7 @@ BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num_al
 !$OMP END PARALLEL DO
 END_PROVIDER

-BEGIN_PROVIDER [double precision, mo_integrated_delta_rho_one_point, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_integrated_delta_rho_one_point, (mo_tot_num,mo_tot_num)]
 BEGIN_DOC
 ! 
 ! array of the integrals needed of integrated_rho(alpha,z) - integrated_rho(beta,z) for z = z_one_point
--- a/plugins/Properties/mulliken.irp.f
+++ b/plugins/Properties/mulliken.irp.f
@ -1,5 +1,5 @@

-BEGIN_PROVIDER [double precision, spin_population, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [double precision, spin_population, (ao_num,ao_num)]
 implicit none
 integer :: i,j
 BEGIN_DOC
@ -57,8 +57,8 @@ BEGIN_PROVIDER [double precision, mulliken_spin_densities, (nucl_num)]

 END_PROVIDER

- BEGIN_PROVIDER [double precision, electronic_population_alpha, (ao_num_align,ao_num)]
-&BEGIN_PROVIDER [double precision, electronic_population_beta, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [double precision, electronic_population_alpha, (ao_num,ao_num)]
+&BEGIN_PROVIDER [double precision, electronic_population_beta, (ao_num,ao_num)]
 implicit none
 integer :: i,j
 BEGIN_DOC
--- a/plugins/Properties/routines_test.irp.f
+++ b/plugins/Properties/routines_test.irp.f
@ -2,7 +2,7 @@

 subroutine test_average_value(array,value)
 implicit none
- double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num)
+ double precision, intent(in) :: array(mo_tot_num,mo_tot_num)
 double precision, intent(in) :: value
 double precision :: tmp,hij
 integer :: i,j
@ -24,7 +24,7 @@ end

 subroutine test_average_value_alpha_beta(array,value)
 implicit none
- double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num)
+ double precision, intent(in) :: array(mo_tot_num,mo_tot_num)
 double precision, intent(in) :: value
 double precision :: tmp,hij
 integer :: i,j
--- a/plugins/Properties/slater_rules_mono_electronic.irp.f
+++ b/plugins/Properties/slater_rules_mono_electronic.irp.f
@ -10,7 +10,7 @@ subroutine i_O1_j(array,key_i,key_j,Nint,hij)
  integer, intent(in)            :: Nint
  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
  double precision, intent(out)  :: hij
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
  
  integer                        :: exc(0:2,2,2)
  integer                        :: degree
@ -25,7 +25,7 @@ subroutine i_O1_j(array,key_i,key_j,Nint,hij)
  ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
  
  hij = 0.d0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call get_excitation_degree(key_i,key_j,degree,Nint)
  select case (degree)
    case (2)
@ -53,7 +53,7 @@ subroutine i_O1_psi(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
  use bitmasks
  implicit none
  integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
  integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
  integer(bit_kind), intent(in)  :: key(Nint,2)
  double precision, intent(in)   :: coef(Ndet_max,Nstate)
@ -80,7 +80,7 @@ subroutine i_O1_psi(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
  call filter_connected_mono(keys,key,Nint,Ndet,idx)
  do ii=1,idx(0)
    i = idx(ii)
-    !DEC$ FORCEINLINE
+    !DIR$ FORCEINLINE
    call i_O1_j(array,keys(1,1,i),key,Nint,hij)
    do j = 1, Nstate
      i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij
@ -96,7 +96,7 @@ double precision function diag_O1_mat_elem(array,det_in,Nint)
  END_DOC
  integer,intent(in)             :: Nint
  integer(bit_kind),intent(in)   :: det_in(Nint,2)
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
  
  integer                        :: i, ispin,tmp
  integer                        :: occ_det(Nint*bit_kind_size,2)
@ -120,7 +120,7 @@ subroutine i_O1_psi_alpha_beta(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H
  use bitmasks
  implicit none
  integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
  integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
  integer(bit_kind), intent(in)  :: key(Nint,2)
  double precision, intent(in)   :: coef(Ndet_max,Nstate)
@ -147,7 +147,7 @@ subroutine i_O1_psi_alpha_beta(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H
  call filter_connected_mono(keys,key,Nint,Ndet,idx)
  do ii=1,idx(0)
    i = idx(ii)
-    !DEC$ FORCEINLINE
+    !DIR$ FORCEINLINE
    call i_O1_j_alpha_beta(array,keys(1,1,i),key,Nint,hij)
    do j = 1, Nstate
      i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij
@ -167,7 +167,7 @@ subroutine i_O1_j_alpha_beta(array,key_i,key_j,Nint,hij)
  integer, intent(in)            :: Nint
  integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
  double precision, intent(out)  :: hij
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
  
  integer                        :: exc(0:2,2,2)
  integer                        :: degree
@ -182,7 +182,7 @@ subroutine i_O1_j_alpha_beta(array,key_i,key_j,Nint,hij)
  ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
  
  hij = 0.d0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call get_excitation_degree(key_i,key_j,degree,Nint)
  select case (degree)
    case (2)
@ -215,7 +215,7 @@ double precision function diag_O1_mat_elem_alpha_beta(array,det_in,Nint)
  END_DOC
  integer,intent(in)             :: Nint
  integer(bit_kind),intent(in)   :: det_in(Nint,2)
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
  
  integer                        :: i, ispin,tmp
  integer                        :: occ_det(Nint*bit_kind_size,2)
@ -319,7 +319,7 @@ subroutine filter_connected_mono(key1,key2,Nint,sze,idx)
    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
      do j=1,Nint
        degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
            popcnt(xor( key1(j,2,i), key2(j,2)))
--- a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
+++ b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
@ -1 +1 @@
-
+Selectors_Utils
--- a/plugins/Selectors_CASSD/selectors.irp.f
+++ b/plugins/Selectors_CASSD/selectors.irp.f
@ -1,10 +1,5 @@
 use bitmasks

-BEGIN_PROVIDER [ integer, psi_selectors_size ]
- implicit none
- psi_selectors_size = psi_det_size
-END_PROVIDER
-
 BEGIN_PROVIDER [ integer, N_det_selectors]
  implicit none
  BEGIN_DOC
@ -66,30 +61,4 @@ END_PROVIDER
  endif
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Transposed psi_selectors
-  END_DOC
-  integer                        :: i,k
-
-  do i=1,N_det_selectors
-    do k=1,N_states
-      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
-    enddo
-  enddo
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Diagonal elements of the H matrix for each selectors 
-  END_DOC
-  integer                        :: i
-  double precision :: diag_H_mat_elem
-  do i = 1, N_det_selectors
-   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
-  enddo
-END_PROVIDER
-

--- a/plugins/Selectors_CASSD/zmq.irp.f
+++ b/plugins/Selectors_CASSD/zmq.irp.f
@ -1,121 +0,0 @@
-subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id, energy, size_energy)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Put the wave function on the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  integer, intent(in)            :: size_energy
-  double precision, intent(out)  :: energy(size_energy)
-  integer                        :: rc
-  integer*8                      :: rc8
-  character*(256)                :: msg
-
-  write(msg,*) 'put_psi ', worker_id, N_states, N_det, psi_det_size, n_det_generators, n_det_selectors
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
-  if (rc /= len(trim(msg))) then
-    print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,ZMQ_SNDMORE)
-  if (rc8 /= N_int*2_8*N_det*bit_kind) then
-    print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)
-  if (rc8 /= psi_det_size*N_states*8_8) then
-    print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)
-  if (rc /= size_energy*8) then
-    print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:rc) /= 'put_psi_reply 1') then
-    print *,  rc, trim(msg)
-    print *,  'Error in put_psi_reply'
-    stop 'error'
-  endif
-
-end
-
-
-
-subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Get the wave function from the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  integer, intent(in)            :: size_energy
-  double precision, intent(out)  :: energy(size_energy)
-  integer                        :: rc
-  integer*8                      :: rc8
-  character*(64)                 :: msg
-
-  write(msg,*) 'get_psi ', worker_id
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
-  if (rc /= len(trim(msg))) then
-    print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:13) /= 'get_psi_reply') then
-    print *,  rc, trim(msg)
-    print *,  'Error in get_psi_reply'
-    stop 'error'
-  endif
-  
-  integer :: N_states_read, N_det_read, psi_det_size_read
-  integer :: N_det_selectors_read, N_det_generators_read
-  read(msg(14:rc),*) N_states_read, N_det_read, psi_det_size_read, &
-    N_det_generators_read, N_det_selectors_read
-
-  N_states = N_states_read
-  N_det    = N_det_read
-  psi_det_size = psi_det_size_read
-  TOUCH psi_det_size N_det N_states
-
-  rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,0)
-  if (rc8 /= N_int*2_8*N_det*bit_kind) then
-    print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,0)
-  if (rc8 /= psi_det_size*N_states*8_8) then
-    print *, '77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-  TOUCH psi_det psi_coef
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)
-  if (rc /= size_energy*8) then
-    print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)'
-    stop 'error'
-  endif
-
-  if (N_det_generators_read > 0) then
-    N_det_generators = N_det_generators_read
-    TOUCH N_det_generators
-  endif
-  if (N_det_selectors_read > 0) then
-    N_det_selectors  = N_det_selectors_read
-    TOUCH N_det_selectors
-  endif
-
-end
-
-
--- a/plugins/Selectors_full/NEEDED_CHILDREN_MODULES
+++ b/plugins/Selectors_full/NEEDED_CHILDREN_MODULES
@ -1 +1 @@
-Determinants Hartree_Fock
+Determinants Hartree_Fock Selectors_Utils
--- a/plugins/Selectors_full/selectors.irp.f
+++ b/plugins/Selectors_full/selectors.irp.f
@ -1,10 +1,5 @@
 use bitmasks

-BEGIN_PROVIDER [ integer, psi_selectors_size ]
- implicit none
- psi_selectors_size = psi_det_size
-END_PROVIDER
-
 BEGIN_PROVIDER [ integer, N_det_selectors]
  implicit none
  BEGIN_DOC
@ -50,30 +45,4 @@ END_PROVIDER
  enddo
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Transposed psi_selectors
-  END_DOC
-  integer                        :: i,k
-
-  do i=1,N_det_selectors
-    do k=1,N_states
-      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
-    enddo
-  enddo
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Diagonal elements of the H matrix for each selectors 
-  END_DOC
-  integer                        :: i
-  double precision :: diag_H_mat_elem
-  do i = 1, N_det_selectors
-   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
-  enddo
-END_PROVIDER
-

--- a/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES
+++ b/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES
@ -1 +1 @@
-Determinants
+Determinants Selectors_Utils
--- a/plugins/Selectors_no_sorted/e_corr_selectors.irp.f
+++ b/plugins/Selectors_no_sorted/e_corr_selectors.irp.f
@ -1,79 +0,0 @@
-
-use bitmasks
- BEGIN_PROVIDER [integer, exc_degree_per_selectors, (N_det_selectors)]
-&BEGIN_PROVIDER [integer, double_index_selectors, (N_det_selectors)]
-&BEGIN_PROVIDER [integer, n_double_selectors]
- implicit none
- BEGIN_DOC
- ! degree of excitation respect to Hartree Fock for the wave function
- !
- ! for the all the selectors determinants
- !
- ! double_index_selectors = list of the index of the double excitations
- ! 
- ! n_double_selectors = number of double excitations in the selectors determinants
- END_DOC
- integer :: i,degree
- n_double_selectors = 0
- do i = 1, N_det_selectors
-  call get_excitation_degree(psi_selectors(1,1,i),ref_bitmask,degree,N_int)
-  exc_degree_per_selectors(i) = degree
-  if(degree==2)then
-   n_double_selectors += 1
-   double_index_selectors(n_double_selectors) =i
-  endif
- enddo
-END_PROVIDER
-
-  BEGIN_PROVIDER[double precision, coef_hf_selector]
- &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf]
- &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf_squared]
- &BEGIN_PROVIDER[double precision, E_corr_per_selectors, (N_det_selectors)]
- &BEGIN_PROVIDER[double precision, i_H_HF_per_selectors, (N_det_selectors)]
- &BEGIN_PROVIDER[double precision, Delta_E_per_selector, (N_det_selectors)]
- &BEGIN_PROVIDER[double precision, E_corr_double_only ]
- &BEGIN_PROVIDER[double precision, E_corr_second_order ]
- implicit none
- BEGIN_DOC
- ! energy of correlation per determinant respect to the Hartree Fock determinant
- !
- ! for the all the double excitations in the selectors determinants
- !
- ! E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
- ! 
- ! E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation 
- !
- ! coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
- END_DOC
- PROVIDE  ref_bitmask_energy psi_selectors ref_bitmask N_int psi_selectors
- integer :: i,degree
- double precision :: hij,diag_H_mat_elem
- E_corr_double_only = 0.d0
- E_corr_second_order = 0.d0
- do i = 1, N_det_selectors
-  if(exc_degree_per_selectors(i)==2)then
-   call i_H_j(ref_bitmask,psi_selectors(1,1,i),N_int,hij)
-   i_H_HF_per_selectors(i) = hij
-   E_corr_per_selectors(i) = psi_selectors_coef(i,1) * hij
-   E_corr_double_only += E_corr_per_selectors(i)
-   E_corr_second_order += hij * hij /(ref_bitmask_energy - diag_H_mat_elem(psi_selectors(1,1,i),N_int))
-  elseif(exc_degree_per_selectors(i) == 0)then
-   coef_hf_selector = psi_selectors_coef(i,1)
-   E_corr_per_selectors(i) = -1000.d0
-   Delta_E_per_selector(i) = 0.d0
-  else
-   E_corr_per_selectors(i) = -1000.d0
-  endif
- enddo
- if (dabs(coef_hf_selector) > 1.d-8) then
-   inv_selectors_coef_hf = 1.d0/coef_hf_selector
-   inv_selectors_coef_hf_squared = inv_selectors_coef_hf * inv_selectors_coef_hf
- else
-   inv_selectors_coef_hf = 0.d0
-   inv_selectors_coef_hf_squared = 0.d0
- endif
- do i = 1,n_double_selectors
-  E_corr_per_selectors(double_index_selectors(i)) *=inv_selectors_coef_hf
- enddo
- E_corr_double_only = E_corr_double_only * inv_selectors_coef_hf
- END_PROVIDER
--- a/plugins/Selectors_no_sorted/selectors.irp.f
+++ b/plugins/Selectors_no_sorted/selectors.irp.f
@ -1,12 +1,5 @@
-
 use bitmasks

-
-BEGIN_PROVIDER [ integer, psi_selectors_size ]
- implicit none
- psi_selectors_size = psi_det_size
-END_PROVIDER
-
 BEGIN_PROVIDER [ integer, N_det_selectors]
 implicit none
 BEGIN_DOC
@ -45,16 +38,3 @@ END_PROVIDER
  enddo
 END_PROVIDER

- BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Diagonal elements of the H matrix for each selectors 
-  END_DOC
-  integer                        :: i
-  double precision :: diag_H_mat_elem
-  do i = 1, N_det_selectors
-   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
-  enddo
- END_PROVIDER
-
-
--- a/plugins/read_integral/read_integrals_mo.irp.f
+++ b/plugins/read_integral/read_integrals_mo.irp.f
@ -28,7 +28,7 @@ subroutine run
   
  call ezfio_get_mo_basis_mo_tot_num(mo_tot_num)

-  allocate (A(mo_tot_num_align,mo_tot_num))
+  allocate (A(mo_tot_num,mo_tot_num))
  A = 0.d0
  
  iunit = getunitandopen('kinetic_mo','r')
--- a/src/AO_Basis/ao_overlap.irp.f
+++ b/src/AO_Basis/ao_overlap.irp.f
@ -1,7 +1,7 @@
- BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num_align,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num_align,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num_align,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num_align,ao_num) ]
+ BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num,ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num,ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num,ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num,ao_num) ]
  implicit none
  BEGIN_DOC  
 ! Overlap between atomic basis functions:
@ -34,8 +34,6 @@
    power_A(1)  = ao_power( j, 1 )
    power_A(2)  = ao_power( j, 2 )
    power_A(3)  = ao_power( j, 3 )
-    !DEC$ VECTOR ALIGNED
-    !DEC$ VECTOR ALWAYS
    do i= 1,ao_num
      ao_overlap(i,j)= 0.d0
      ao_overlap_x(i,j)= 0.d0
@ -49,7 +47,6 @@
      power_B(3)  = ao_power( i, 3 )
      do n = 1,ao_prim_num(j)
      alpha = ao_expo_ordered_transp(n,j)
-      !DEC$ VECTOR ALIGNED
      do l = 1, ao_prim_num(i)
        beta = ao_expo_ordered_transp(l,i)
        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
@ -72,7 +69,7 @@
 END_PROVIDER


-BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ]
  implicit none
  BEGIN_DOC  
 ! Overlap between absolute value of atomic basis functions:
@ -103,8 +100,6 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ]
   power_A(1)  = ao_power( j, 1 )
   power_A(2)  = ao_power( j, 2 )
   power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
   do i= 1,ao_num
    ao_overlap_abs(i,j)= 0.d0
    B_center(1) = nucl_coord( ao_nucl(i), 1 )
@ -115,7 +110,6 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ]
    power_B(3)  = ao_power( i, 3 )
    do n = 1,ao_prim_num(j)
     alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
     do l = 1, ao_prim_num(i)
      beta = ao_expo_ordered_transp(l,i)
      call overlap_x_abs(A_center(1),B_center(1),alpha,beta,power_A(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
--- a/src/AO_Basis/aos.irp.f
+++ b/src/AO_Basis/aos.irp.f
@ -1,14 +1,3 @@
-BEGIN_PROVIDER [ integer, ao_num_align ]
-   implicit none
-   
-   BEGIN_DOC
-   ! Number of atomic orbitals align
-   END_DOC
-   
-   integer                        :: align_double
-   ao_num_align = align_double(ao_num)
-END_PROVIDER 
-
 BEGIN_PROVIDER [ integer, ao_prim_num_max ]
 implicit none
 ao_prim_num_max = 0
@ -16,7 +5,7 @@ BEGIN_PROVIDER [ integer, ao_prim_num_max ]
 call ezfio_get_ao_basis_ao_prim_num_max(ao_prim_num_max)
 END_PROVIDER

- BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num_align,ao_prim_num_max) ]
+ BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num,ao_prim_num_max) ]
 &BEGIN_PROVIDER [ double precision, ao_coef_normalization_factor, (ao_num) ]
  implicit none
  BEGIN_DOC
@ -86,8 +75,8 @@ BEGIN_PROVIDER [ double precision, ao_coef_normalization_libint_factor, (ao_num)
 END_PROVIDER


- BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num_align,ao_prim_num_max) ]
-&BEGIN_PROVIDER [ double precision, ao_expo_ordered, (ao_num_align,ao_prim_num_max) ]
+ BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num,ao_prim_num_max) ]
+&BEGIN_PROVIDER [ double precision, ao_expo_ordered, (ao_num,ao_prim_num_max) ]
   implicit none
   BEGIN_DOC
   ! Sorted primitives to accelerate 4 index MO transformation
@ -112,7 +101,7 @@ END_PROVIDER
 END_PROVIDER


-BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_num_max_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_num_max,ao_num) ]
  implicit none
  BEGIN_DOC
  ! Transposed ao_coef_normalized_ordered
@ -126,7 +115,7 @@ BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_n
  
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, ao_expo_ordered_transp, (ao_prim_num_max_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_expo_ordered_transp, (ao_prim_num_max,ao_num) ]
  implicit none
  BEGIN_DOC
  ! Transposed ao_expo_ordered
@ -155,16 +144,6 @@ END_PROVIDER
 ao_l_max = maxval(ao_l)
 END_PROVIDER

-BEGIN_PROVIDER [ integer, ao_prim_num_max_align ]
- implicit none
- BEGIN_DOC
-! Number of primitives per atomic orbital aligned
- END_DOC
-
- integer :: align_double
- ao_prim_num_max_align = align_double(ao_prim_num_max)
-END_PROVIDER
-
 integer function ao_power_index(nx,ny,nz)
  implicit none
  integer, intent(in)            :: nx, ny, nz
--- a/src/Bitmask/mpi.irp.f
+++ b/src/Bitmask/mpi.irp.f
@ -0,0 +1,44 @@
+BEGIN_PROVIDER [ integer, mpi_bit_kind ]
+ use bitmasks
+ implicit none
+ BEGIN_DOC
+ ! MPI bit kind type
+ END_DOC
+ IRP_IF MPI
+  include 'mpif.h'
+  if (bit_kind == 4) then
+    mpi_bit_kind = MPI_INTEGER4
+  else if (bit_kind == 8) then
+    mpi_bit_kind = MPI_INTEGER8
+  else
+    stop 'Wrong bit kind in mpi_bit_kind'
+  endif
+ IRP_ELSE
+  mpi_bit_kind = -1
+ IRP_ENDIF
+END_PROVIDER
+
+subroutine broadcast_chunks_bit_kind(A, LDA)
+  use bitmasks
+  implicit none
+  integer, intent(in)             :: LDA
+  integer(bit_kind), intent(inout) :: A(LDA)
+  BEGIN_DOC
+! Broadcast with chunks of ~2GB
+  END_DOC
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: i, sze, ierr
+    do i=1,LDA,200000000/bit_kind_size
+      sze = min(LDA-i+1, 200000000/bit_kind_size)
+      call MPI_BCAST (A(i), sze, MPI_BIT_KIND, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        print *,  irp_here//': Unable to broadcast chunks bit_kind', i
+        stop -1
+      endif
+    enddo
+  IRP_ENDIF
+end
+
+
+
--- a/src/Davidson/davidson_parallel.irp.f
+++ b/src/Davidson/davidson_parallel.irp.f
@ -371,3 +371,69 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ]
 call write_int(6,nthreads_davidson,'Number of threads for Diagonalization')
 END_PROVIDER

+
+subroutine zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put N_states_diag on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(256)                :: msg
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'N_states_diag'
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error sending N_states_diag'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,N_states_diag,4,0)
+  if (rc /= 4) then
+    print *,  irp_here, ': Error sending N_states_diag'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_data_reply ok') then
+    print *,  rc, trim(msg)
+    print *,  irp_here, ': Error in put_data_reply'
+    stop 'error'
+  endif
+
+end
+
+subroutine zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get N_states_diag from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(64)                 :: msg
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'N_states_diag'
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error getting N_states_diag'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) 
+  if (msg(1:14) /= 'get_data_reply') then 
+    print *,  rc, trim(msg) 
+    print *,  irp_here, ': Error in get_data_reply' 
+    stop 'error' 
+  endif 
+ 
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,N_states_diag,4,0) 
+  if (rc /= 4) then 
+    print *, irp_here, ': Error getting N_states_diag'
+    stop 'error' 
+  endif 
+
+end
--- a/src/Davidson/diagonalization.irp.f
+++ b/src/Davidson/diagonalization.irp.f
@ -363,8 +363,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
  enddo
  write(iunit,'(A)') trim(write_buffer)

-  integer, external :: align_double
-
  allocate(                                                          &
      kl_pairs(2,N_st_diag*(N_st_diag+1)/2),                         &
      W(sze,N_st_diag,davidson_sze_max),                           &
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@ -113,7 +113,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
    stop -1
  endif
  
-  integer, external              :: align_double
  itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
  
  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
--- a/src/Davidson/u0Hu0_old.irp.f
+++ b/src/Davidson/u0Hu0_old.irp.f
@ -1,518 +0,0 @@
-
-subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  BEGIN_DOC
-  ! Computes v_0 = H|u_0> 
-  !
-  ! n : number of determinants
-  !
-  ! H_jj : array of <j|H|j>
-  !
-  END_DOC
-  integer, intent(in)            :: N_st,n,Nint, sze
-  double precision, intent(out)  :: v_0(sze,N_st)
-  double precision, intent(in)   :: u_0(sze,N_st)
-  double precision, intent(in)   :: H_jj(n)
-  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
-  double precision               :: hij,s2 
-  double precision, allocatable  :: vt(:,:), ut(:,:), st(:,:)
-  integer                        :: i,j,k,l, jj,ii
-  integer                        :: i0, j0
-  
-  integer, allocatable           :: shortcut(:,:), sort_idx(:,:)
-  integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
-  integer(bit_kind)              :: sorted_i(Nint)
-  
-  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
-  integer                        :: N_st_8
-  
-  integer, external              :: align_double
-  
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
-
-  N_st_8 = align_double(N_st)
-
-  ASSERT (Nint > 0)
-  ASSERT (Nint == N_int)
-  ASSERT (n>0)
-  PROVIDE ref_bitmask_energy
-
-  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
-  allocate( ut(N_st_8,n))
-
-  v_0 = 0.d0
-
-  call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
-  call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
-
-  !$OMP PARALLEL DEFAULT(NONE)                                       &
-      !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) 
-  allocate(vt(N_st_8,n),st(N_st_8,n))
-  Vt = 0.d0
-  St = 0.d0
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,2),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,1)
-  do sh=1,shortcut(0,2)
-    do i=shortcut(sh,2),shortcut(sh+1,2)-1
-      org_i = sort_idx(i,2)
-      do j=shortcut(sh,2),shortcut(sh+1,2)-1
-        org_j = sort_idx(j,2)
-        ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
-          if (ext > 4) exit
-        end do
-        if(ext == 4) then
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-            enddo
-        end if
-      end do
-    end do
-  enddo
-  !$OMP END DO
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,1),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,1)
-  do sh=1,shortcut(0,1)
-    do sh2=1,shortcut(0,1)
-      if (sh==sh2) cycle
-
-      exa = 0
-      do ni=1,Nint
-        exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
-      end do
-      if(exa > 2) then
-        cycle
-      end if
-
-      do i=shortcut(sh,1),shortcut(sh+1,1)-1
-        org_i = sort_idx(i,1)
-        do ni=1,Nint
-          sorted_i(ni) = sorted(ni,i,1)
-        enddo
-
-        do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
-          ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-          if (ext > 4) cycle
-          do ni=2,Nint
-            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-            if (ext > 4) exit
-          end do
-          if(ext <= 4) then
-            org_j = sort_idx(j,1)
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            if (hij /= 0.d0) then
-              do istate=1,n_st
-                vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              enddo
-            endif
-            if (ext /= 2) then
-              call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-              if (s2 /= 0.d0) then
-                do istate=1,n_st
-                  st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-                enddo
-              endif
-            endif
-          endif
-        enddo
-        
-      enddo
-    enddo
-
-    exa = 0
-
-    do i=shortcut(sh,1),shortcut(sh+1,1)-1
-      org_i = sort_idx(i,1)
-      do ni=1,Nint
-        sorted_i(ni) = sorted(ni,i,1)
-      enddo
-
-      do j=shortcut(sh,1),i-1
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-      
-      do j=i+1,shortcut(sh+1,1)-1
-        if (i==j) cycle
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-    enddo
-  enddo
-  !$OMP END DO 
-
-  do istate=1,N_st
-    do i=1,n
-      !$OMP ATOMIC
-      v_0(i,istate) = v_0(i,istate) + vt(istate,i)
-    enddo
-  enddo
-
-  deallocate(vt,st)
-  !$OMP END PARALLEL
-
-  do istate=1,N_st
-    do i=1,n
-      v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
-    enddo
-  enddo
-  deallocate (shortcut, sort_idx, sorted, version, ut)
-end
-
-
-
-
-
-subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  BEGIN_DOC
-  ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
-  !
-  ! n : number of determinants
-  !
-  ! H_jj : array of <j|H|j>
-  !
-  ! S2_jj : array of <j|S^2|j>
-  END_DOC
-  integer, intent(in)            :: N_st,n,Nint, sze
-  double precision, intent(out)  :: v_0(sze,N_st), s_0(sze,N_st)
-  double precision, intent(in)   :: u_0(sze,N_st)
-  double precision, intent(in)   :: H_jj(n), S2_jj(n)
-  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
-  double precision               :: hij,s2 
-  double precision, allocatable  :: vt(:,:), ut(:,:), st(:,:)
-  integer                        :: i,j,k,l, jj,ii
-  integer                        :: i0, j0
-  
-  integer, allocatable           :: shortcut(:,:), sort_idx(:,:)
-  integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
-  integer(bit_kind)              :: sorted_i(Nint)
-  
-  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
-  integer                        :: N_st_8
-  
-  integer, external              :: align_double
-  
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
-
-  N_st_8 = align_double(N_st)
-
-  ASSERT (Nint > 0)
-  ASSERT (Nint == N_int)
-  ASSERT (n>0)
-  PROVIDE ref_bitmask_energy
-
-  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
-  allocate( ut(N_st_8,n))
-
-  v_0 = 0.d0
-  s_0 = 0.d0
-
-  call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
-  call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
-
-  !$OMP PARALLEL DEFAULT(NONE)                                       &
-      !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) 
-  allocate(vt(N_st_8,n),st(N_st_8,n))
-  Vt = 0.d0
-  St = 0.d0
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,2),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,4)
-  do sh=1,shortcut(0,2)
-    do i=shortcut(sh,2),shortcut(sh+1,2)-1
-      org_i = sort_idx(i,2)
-      do j=shortcut(sh,2),shortcut(sh+1,2)-1
-        org_j = sort_idx(j,2)
-        ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
-          if (ext > 4) exit
-        end do
-        if(ext == 4) then
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-            enddo
-        end if
-      end do
-    end do
-  enddo
-  !$OMP END DO
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,1),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,4)
-  do sh=1,shortcut(0,1)
-    do sh2=1,shortcut(0,1)
-      if (sh==sh2) cycle
-
-      exa = 0
-      do ni=1,Nint
-        exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
-      end do
-      if(exa > 2) then
-        cycle
-      end if
-
-      do i=shortcut(sh,1),shortcut(sh+1,1)-1
-        org_i = sort_idx(i,1)
-        do ni=1,Nint
-          sorted_i(ni) = sorted(ni,i,1)
-        enddo
-
-        do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
-          ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-          if (ext > 4) cycle
-          do ni=2,Nint
-            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-            if (ext > 4) exit
-          end do
-          if(ext <= 4) then
-            org_j = sort_idx(j,1)
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            if (hij /= 0.d0) then
-              do istate=1,n_st
-                vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              enddo
-            endif
-            if (ext /= 2) then
-              call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-              if (s2 /= 0.d0) then
-                do istate=1,n_st
-                  st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-                enddo
-              endif
-            endif
-          endif
-        enddo
-        
-      enddo
-    enddo
-
-    exa = 0
-
-    do i=shortcut(sh,1),shortcut(sh+1,1)-1
-      org_i = sort_idx(i,1)
-      do ni=1,Nint
-        sorted_i(ni) = sorted(ni,i,1)
-      enddo
-
-      do j=shortcut(sh,1),i-1
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-      
-      do j=i+1,shortcut(sh+1,1)-1
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-    enddo
-  enddo
-  !$OMP END DO 
-
-  do istate=1,N_st
-    do i=1,n
-      !$OMP ATOMIC
-      v_0(i,istate) = v_0(i,istate) + vt(istate,i)
-      !$OMP ATOMIC
-      s_0(i,istate) = s_0(i,istate) + st(istate,i)
-    enddo
-  enddo
-
-  deallocate(vt,st)
-  !$OMP END PARALLEL
-
-  do istate=1,N_st
-    do i=1,n
-      v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
-      s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
-    enddo
-  enddo
-  deallocate (shortcut, sort_idx, sorted, version, ut)
-end
-
-subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  integer, intent(in)            :: N_st,n,Nint, sze
-  integer(bit_kind), intent(in)  :: keys_tmp(Nint,2,n)
-  double precision, intent(inout)   :: v_0(sze,N_st), s_0(sze,N_st)
-  double precision, intent(in)   :: u_0(sze,N_st)
-  double precision, intent(in)   :: H_jj(n), S2_jj(n)
-  
-  PROVIDE ref_bitmask_energy
-
-  double precision, allocatable :: vt(:,:)
-  integer, allocatable :: idx(:)
-  integer                        :: i,j, jj, l
-  double precision               :: hij
-
-  do i=1,n
-    v_0(i,:) = H_jj(i) * u_0(i,:)
-  enddo
-
-  allocate(idx(0:n), vt(N_st,n))
-  Vt = 0.d0
-  !$OMP PARALLEL DO DEFAULT(shared) PRIVATE(i,idx,jj,j,degree,exc,phase,hij,l) SCHEDULE(static,1)
-  do i=2,n
-    idx(0) = i
-    call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx)
-    do jj=1,idx(0)
-      j = idx(jj)
-      double precision :: phase
-      integer :: degree
-      integer :: exc(0:2,2,2)
-      call get_excitation(keys_tmp(1,1,j),keys_tmp(1,1,i),exc,degree,phase,Nint)
-!       if ((degree == 2).and.(exc(0,1,1)==1)) then
-!         continue
-!       else
-!         cycle
-!       endif
-!       if ((degree == 2).and.(exc(0,1,1)==1)) cycle
-!      if ((degree > 1)) cycle
-!      if ((degree == 1)) cycle
-!      if (exc(0,1,2) /= 0) cycle
-!      if (exc(0,1,1) == 2) cycle
-!      if (exc(0,1,2) == 2) cycle
-!      if ((degree==1).and.(exc(0,1,1) == 1)) cycle
-      call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
-      do l=1,N_st
-        !$OMP ATOMIC
-        vt (l,i) = vt (l,i) + hij*u_0(j,l)
-        !$OMP ATOMIC
-        vt (l,j) = vt (l,j) + hij*u_0(i,l)
-      enddo
-    enddo
-  enddo
-  !$OMP END PARALLEL DO
-  do i=1,n
-    v_0(i,:) = v_0(i,:) + vt(:,i)
-  enddo
-end
-
--- a/src/Determinants/filter_connected.irp.f
+++ b/src/Determinants/filter_connected.irp.f
@ -78,7 +78,7 @@ subroutine filter_not_connected(key1,key2,Nint,sze,idx)
    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
      do j=1,Nint
        degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
            popcnt(xor( key1(j,2,i), key2(j,2)))
@ -177,7 +177,7 @@ subroutine filter_connected(key1,key2,Nint,sze,idx)
    !DIR$ LOOP COUNT (1000)
    do i=1,sze
      degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
      do j=1,Nint
        degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
            popcnt(xor( key1(j,2,i), key2(j,2)))
@ -404,7 +404,7 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx)
    !DIR$ LOOP COUNT (1000)
    outer: do i=1,sze
      degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
      do m=1,Nint
        if ( key1(m,1,i) /=  key2(m,1)) then
          degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) 
@ -454,7 +454,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat)
  integer :: degree
  degree = popcnt(xor( ref_bitmask(1,1), key2(1,1))) +                      &
      popcnt(xor( ref_bitmask(1,2), key2(1,2)))
-  !DEC$ NOUNROLL
+  !DIR$ NOUNROLL
  do m=2,Nint
    degree = degree+ popcnt(xor( ref_bitmask(m,1), key2(m,1))) +            &
        popcnt(xor( ref_bitmask(m,2), key2(m,2)))
@ -526,7 +526,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat)
     !DIR$ LOOP COUNT (1000)
     do i=1,sze
       degree_x2 = 0
-       !DEC$ LOOP COUNT MIN(4)
+       !DIR$ LOOP COUNT MIN(4)
       do m=1,Nint
         degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) +&
             popcnt(xor( key1(m,2,i), key2(m,2)))
@ -610,7 +610,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat)
     !DIR$ LOOP COUNT (1000)
     do i=1,sze
       degree_x2 = 0
-       !DEC$ LOOP COUNT MIN(4)
+       !DIR$ LOOP COUNT MIN(4)
       do m=1,Nint
         degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) +&
             popcnt(xor( key1(m,2,i), key2(m,2)))
--- a/src/Determinants/two_body_dm_map.irp.f
+++ b/src/Determinants/two_body_dm_map.irp.f
@ -141,8 +141,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl)
     n_elements += 1
     contrib = psi_coef(i,1) * psi_coef(j,1) * phase
     buffer_value(n_elements) = contrib
-     !DEC$ FORCEINLINE
-!    call mo_bielec_integrals_index(h1,p1,h2,p2,buffer_i(n_elements))
+     !DIR$ FORCEINLINE
     call mo_bielec_integrals_index(h1,h2,p1,p2,buffer_i(n_elements))
 !    if (n_elements == size_buffer) then
 !      call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,&
--- a/src/Integrals_Bielec/ao_bi_integrals.irp.f
+++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f
@ -483,7 +483,6 @@ double precision function general_primitive_integral(dim,            &
  
  accu = 0.d0
  iorder = iorder_p(1)+iorder_q(1)+iorder_p(1)+iorder_q(1)
-  !DIR$ VECTOR ALIGNED
  do ix=0,iorder
    Ix_pol(ix) = 0.d0
  enddo
@ -494,9 +493,9 @@ double precision function general_primitive_integral(dim,            &
    do jx = 0, iorder_q(1)
      d = a*Q_new(jx,1)
      if (abs(d) < thresh) cycle
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
      call give_polynom_mult_center_x(P_center(1),Q_center(1),ix,jx,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dx,nx)
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
      call add_poly_multiply(dx,nx,d,Ix_pol,n_Ix)
    enddo
  enddo
@ -504,7 +503,6 @@ double precision function general_primitive_integral(dim,            &
    return
  endif
  iorder = iorder_p(2)+iorder_q(2)+iorder_p(2)+iorder_q(2)
-  !DIR$ VECTOR ALIGNED
  do ix=0, iorder
    Iy_pol(ix) = 0.d0
  enddo
@ -515,9 +513,9 @@ double precision function general_primitive_integral(dim,            &
      do jy = 0, iorder_q(2)
        e = b*Q_new(jy,2)
        if (abs(e) < thresh) cycle
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
        call   give_polynom_mult_center_x(P_center(2),Q_center(2),iy,jy,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dy,ny)
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
        call add_poly_multiply(dy,ny,e,Iy_pol,n_Iy)
      enddo
    endif
@ -537,9 +535,9 @@ double precision function general_primitive_integral(dim,            &
      do jz = 0, iorder_q(3)
        f = c*Q_new(jz,3)
        if (abs(f) < thresh) cycle
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
        call   give_polynom_mult_center_x(P_center(3),Q_center(3),iz,jz,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dz,nz)
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
        call add_poly_multiply(dz,nz,f,Iz_pol,n_Iz)
      enddo
    endif
@ -559,7 +557,7 @@ double precision function general_primitive_integral(dim,            &
    d_poly(i)=0.d0
  enddo
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp)
  if (n_pt_tmp == -1) then
    return
@ -569,7 +567,7 @@ double precision function general_primitive_integral(dim,            &
    d1(i)=0.d0
  enddo
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
  double precision               :: rint_sum
  accu = accu + rint_sum(n_pt_out,const,d1)
@ -673,7 +671,6 @@ subroutine integrale_new(I_f,a_x,b_x,c_x,d_x,a_y,b_y,c_y,d_y,a_z,b_z,c_z,d_z,p,q
  sy = iy+jy
  sz = iz+jz

-  !DIR$ VECTOR ALIGNED
  do i = 1,n_pt
    B10(i)  = p10_1 -  gauleg_t2(i,j)* p10_2
    B01(i)  = p01_1 -  gauleg_t2(i,j)* p01_2
@ -682,27 +679,23 @@ subroutine integrale_new(I_f,a_x,b_x,c_x,d_x,a_y,b_y,c_y,d_y,a_z,b_z,c_z,d_z,p,q
  if (sx > 0) then
    call I_x1_new(ix,jx,B10,B01,B00,t1,n_pt)
  else
-    !DIR$ VECTOR ALIGNED
    do i = 1,n_pt
      t1(i) = 1.d0
    enddo
  endif
  if (sy > 0) then
    call I_x1_new(iy,jy,B10,B01,B00,t2,n_pt)
-    !DIR$ VECTOR ALIGNED
    do i = 1,n_pt
      t1(i) = t1(i)*t2(i)
    enddo
  endif
  if (sz > 0) then
    call I_x1_new(iz,jz,B10,B01,B00,t2,n_pt)
-    !DIR$ VECTOR ALIGNED
    do i = 1,n_pt
      t1(i) = t1(i)*t2(i)
    enddo
  endif
  I_f= 0.d0
-  !DIR$ VECTOR ALIGNED
  do i = 1,n_pt
    I_f += gauleg_w(i,j)*t1(i)
  enddo
@ -724,7 +717,6 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt)
  integer                        :: i
  
  if(c<0)then
-    !DIR$ VECTOR ALIGNED
    do i=1,n_pt
      res(i) = 0.d0
    enddo
@ -732,14 +724,12 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt)
    call I_x2_new(c,B_10,B_01,B_00,res,n_pt)
  else if (a==1) then
    call I_x2_new(c-1,B_10,B_01,B_00,res,n_pt)
-    !DIR$ VECTOR ALIGNED
    do i=1,n_pt
      res(i) = c * B_00(i) * res(i)
    enddo
  else
    call I_x1_new(a-2,c,B_10,B_01,B_00,res,n_pt)
    call I_x1_new(a-1,c-1,B_10,B_01,B_00,res2,n_pt)
-    !DIR$ VECTOR ALIGNED
    do i=1,n_pt
      res(i) = (a-1) * B_10(i) * res(i) &
               + c * B_00(i) * res2(i)
@ -759,18 +749,15 @@ recursive subroutine I_x2_new(c,B_10,B_01,B_00,res,n_pt)
  integer                        :: i
  
  if(c==1)then
-    !DIR$ VECTOR ALIGNED
    do i=1,n_pt
      res(i) = 0.d0
    enddo
  elseif(c==0) then
-    !DIR$ VECTOR ALIGNED
    do i=1,n_pt
      res(i) = 1.d0
    enddo
  else
    call I_x1_new(0,c-2,B_10,B_01,B_00,res,n_pt)
-    !DIR$ VECTOR ALIGNED
    do i=1,n_pt
      res(i) =  (c-1) * B_01(i) * res(i)
    enddo
@ -906,7 +893,6 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
  integer                        :: nx, ix,iy,ny
  
  ASSERT (a>2)
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    X(ix) = 0.d0
@ -921,17 +907,15 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
    call I_x1_pol_mult_recurs(a-2,c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
  endif
  
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,nx
    X(ix) *= dble(a-1)
  enddo
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(X,nx,B_10,2,d,nd)
  
  nx = nd
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    X(ix) = 0.d0
@ -945,19 +929,17 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
      call I_x1_pol_mult_recurs(a-1,c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
    endif
    if (c>1) then
-      !DIR$ VECTOR ALIGNED
      !DIR$ LOOP COUNT(8)
      do ix=0,nx
        X(ix) *= c
      enddo
    endif
-    !DEC$ FORCEINLINE
+    !DIR$ FORCEINLINE
    call multiply_poly(X,nx,B_00,2,d,nd)
  endif
  
  ny=0
  
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    Y(ix) = 0.d0
@ -970,7 +952,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
    call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
  endif
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(Y,ny,C_00,2,d,nd)
  
 end
@ -997,7 +979,6 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
  endif
  
  nx = nd
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    X(ix) = 0.d0
@ -1005,26 +986,24 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
  call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
  
  if (c>1) then
-    !DIR$ VECTOR ALIGNED
    !DIR$ LOOP COUNT(8)
    do ix=0,nx
      X(ix) *= dble(c)
    enddo
  endif
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(X,nx,B_00,2,d,nd)
  
  ny=0
  
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    Y(ix) = 0.d0
  enddo
  call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(Y,ny,C_00,2,d,nd)
  
 end
@ -1045,7 +1024,6 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
  integer                        :: nx, ix,iy,ny
  
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    X(ix) = 0.d0
@ -1053,11 +1031,10 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
  nx = 0
  call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(X,nx,B_10,2,d,nd)
  
  nx = nd
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    X(ix) = 0.d0
@ -1067,26 +1044,24 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
  call I_x1_pol_mult_a1(c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
  
  if (c>1) then
-    !DIR$ VECTOR ALIGNED
    !DIR$ LOOP COUNT(8)
    do ix=0,nx
      X(ix) *= dble(c)
    enddo
  endif
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(X,nx,B_00,2,d,nd)
  
  ny=0
-  !DIR$ VECTOR ALIGNED
  !DIR$ LOOP COUNT(8)
  do ix=0,n_pt_in
    Y(ix) = 0.d0
  enddo
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(Y,ny,C_00,2,d,nd)
  
 end
@ -1104,7 +1079,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
  double precision, intent(in)   :: B_10(0:2),B_01(0:2),B_00(0:2),C_00(0:2),D_00(0:2)
  integer                        :: nx, ix,ny
  double precision               :: X(0:max_dim),Y(0:max_dim)
-  !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
  integer                        :: i
  
  select case (c)
@ -1135,13 +1110,12 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
      Y(1) = D_00(1)
      Y(2) = D_00(2)
      
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
      call multiply_poly(Y,ny,D_00,2,d,nd)
      return
      
      case default
      
-      !DIR$ VECTOR ALIGNED
      !DIR$ LOOP COUNT(6)
      do ix=0,c+c
        X(ix) = 0.d0
@ -1149,24 +1123,22 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
      nx = 0
      call I_x2_pol_mult(c-2,B_10,B_01,B_00,C_00,D_00,X,nx,dim)
      
-      !DIR$ VECTOR ALIGNED
      !DIR$ LOOP COUNT(6)
      do ix=0,nx
        X(ix) *= dble(c-1)
      enddo
      
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
      call multiply_poly(X,nx,B_01,2,d,nd)
      
      ny = 0
-      !DIR$ VECTOR ALIGNED
      !DIR$ LOOP COUNT(6)
      do ix=0,c+c
        Y(ix) = 0.d0
      enddo
      call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
      
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
      call multiply_poly(Y,ny,D_00,2,d,nd)
      
  end select
--- a/src/Integrals_Bielec/gauss_legendre.irp.f
+++ b/src/Integrals_Bielec/gauss_legendre.irp.f
@ -1,14 +1,5 @@
-BEGIN_PROVIDER [ integer, n_pt_max_integrals_16 ]
- implicit none
- BEGIN_DOC
- ! Aligned n_pt_max_integrals
- END_DOC
- integer, external :: align_double
- n_pt_max_integrals_16 = align_double(n_pt_max_integrals)
-END_PROVIDER
-
- BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals_16,n_pt_max_integrals/2) ]
-&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals_16,n_pt_max_integrals/2) ]
+ BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals,n_pt_max_integrals/2) ]
+&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals,n_pt_max_integrals/2) ]
   implicit none
   BEGIN_DOC
   ! t_w(i,1,k) = w(i)
--- a/src/Integrals_Bielec/integrals_3_index.irp.f
+++ b/src/Integrals_Bielec/integrals_3_index.irp.f
@ -1,5 +1,5 @@
- BEGIN_PROVIDER [double precision, big_array_coulomb_integrals, (mo_tot_num_align,mo_tot_num, mo_tot_num)]
-&BEGIN_PROVIDER [double precision, big_array_exchange_integrals,(mo_tot_num_align,mo_tot_num, mo_tot_num)]
+ BEGIN_PROVIDER [double precision, big_array_coulomb_integrals, (mo_tot_num,mo_tot_num, mo_tot_num)]
+&BEGIN_PROVIDER [double precision, big_array_exchange_integrals,(mo_tot_num,mo_tot_num, mo_tot_num)]
 implicit none
 integer :: i,j,k,l
 double precision :: get_mo_bielec_integral
--- a/src/Integrals_Bielec/mo_bi_integrals.irp.f
+++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f
@ -207,7 +207,7 @@ subroutine add_integrals_to_map(mask_ijkl)
  double precision, allocatable  :: bielec_tmp_1(:)
  double precision, allocatable  :: bielec_tmp_2(:,:)
  double precision, allocatable  :: bielec_tmp_3(:,:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
+  !DIR$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
  
  integer                        :: n_integrals
  integer                        :: size_buffer
@ -276,7 +276,7 @@ subroutine add_integrals_to_map(mask_ijkl)
  
  size_buffer = min(ao_num*ao_num*ao_num,16000000)
  print*, 'Providing the molecular integrals '
-  print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +&
+  print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+&
      ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
  
  call wall_time(wall_1)
@ -289,18 +289,18 @@ subroutine add_integrals_to_map(mask_ijkl)
      !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
      !$OMP  wall_0,thread_num,accu_bis)                             &
      !$OMP  DEFAULT(NONE)                                           &
-      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,mo_tot_num_align,&
+      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,   &
      !$OMP  mo_coef_transp,                                         &
      !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
      !$OMP  mo_coef_is_built, wall_1,                               &
      !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
  n_integrals = 0
  wall_0 = wall_1
-  allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k),                 &
-      bielec_tmp_1(mo_tot_num_align),                                &
+  allocate(bielec_tmp_3(mo_tot_num, n_j, n_k),                 &
+      bielec_tmp_1(mo_tot_num),                                &
      bielec_tmp_0(ao_num,ao_num),                                   &
      bielec_tmp_0_idx(ao_num),                                      &
-      bielec_tmp_2(mo_tot_num_align, n_j),                           &
+      bielec_tmp_2(mo_tot_num, n_j),                           &
      buffer_i(size_buffer),                                         &
      buffer_value(size_buffer) )
  
@ -308,10 +308,8 @@ subroutine add_integrals_to_map(mask_ijkl)
  !$  thread_num = omp_get_thread_num()
  !$OMP DO SCHEDULE(guided)
  do l1 = 1,ao_num
-    !DEC$ VECTOR ALIGNED
    bielec_tmp_3 = 0.d0
    do k1 = 1,ao_num
-      !DEC$ VECTOR ALIGNED
      bielec_tmp_2 = 0.d0
      do j1 = 1,ao_num
        call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1))
@ -333,7 +331,6 @@ subroutine add_integrals_to_map(mask_ijkl)
          cycle
        endif
        
-        !DEC$ VECTOR ALIGNED
        bielec_tmp_1 = 0.d0
        ii1=1
        do ii1 = 1,kmax-4,4
@ -443,7 +440,7 @@ subroutine add_integrals_to_map(mask_ijkl)
            endif
            n_integrals += 1
            buffer_value(n_integrals) = bielec_tmp_1(i)
-            !DEC$ FORCEINLINE
+            !DIR$ FORCEINLINE
            call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
            if (n_integrals == size_buffer) then
              call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@ -514,7 +511,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
  double precision, allocatable  :: bielec_tmp_1(:)
  double precision, allocatable  :: bielec_tmp_2(:,:)
  double precision, allocatable  :: bielec_tmp_3(:,:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
+  !DIR$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
  
  integer                        :: n_integrals
  integer                        :: size_buffer
@ -571,7 +568,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
  
  size_buffer = min(ao_num*ao_num*ao_num,16000000)
  print*, 'Providing the molecular integrals '
-  print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +&
+  print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+&
      ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
  
  call wall_time(wall_1)
@ -583,18 +580,18 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
      !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
      !$OMP  wall_0,thread_num,accu_bis)                             &
      !$OMP  DEFAULT(NONE)                                           &
-      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,mo_tot_num_align,&
+      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,       &
      !$OMP  mo_coef_transp,                                         &
      !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
      !$OMP  mo_coef_is_built, wall_1,                               &
      !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
  n_integrals = 0
  wall_0 = wall_1
-  allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k),                 &
-      bielec_tmp_1(mo_tot_num_align),                                &
+  allocate(bielec_tmp_3(mo_tot_num, n_j, n_k),                 &
+      bielec_tmp_1(mo_tot_num),                                &
      bielec_tmp_0(ao_num,ao_num),                                   &
      bielec_tmp_0_idx(ao_num),                                      &
-      bielec_tmp_2(mo_tot_num_align, n_j),                           &
+      bielec_tmp_2(mo_tot_num, n_j),                           &
      buffer_i(size_buffer),                                         &
      buffer_value(size_buffer) )
  
@ -602,10 +599,8 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
  !$  thread_num = omp_get_thread_num()
  !$OMP DO SCHEDULE(guided)
  do l1 = 1,ao_num
-    !DEC$ VECTOR ALIGNED
    bielec_tmp_3 = 0.d0
    do k1 = 1,ao_num
-      !DEC$ VECTOR ALIGNED
      bielec_tmp_2 = 0.d0
      do j1 = 1,ao_num
        call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1))
@ -626,7 +621,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
          cycle
        endif
        
-        !DEC$ VECTOR ALIGNED
        bielec_tmp_1 = 0.d0
        ii1=1
        do ii1 = 1,kmax-4,4
@ -728,7 +722,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
          if(i==k .and. j==l .and. i.ne.j)then
            buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0
          endif
-          !DEC$ FORCEINLINE
+          !DIR$ FORCEINLINE
          call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
          if (n_integrals == size_buffer) then
            call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@ -760,7 +754,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
        
        n_integrals += 1
        buffer_value(n_integrals) = bielec_tmp_1(i)
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
        call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
        if (n_integrals == size_buffer) then
          call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@ -828,7 +822,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
  double precision, allocatable  :: bielec_tmp_1(:)
  double precision, allocatable  :: bielec_tmp_2(:,:)
  double precision, allocatable  :: bielec_tmp_3(:,:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
+  !DIR$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
  
  integer                        :: n_integrals
  integer                        :: size_buffer
@ -853,7 +847,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
  
  size_buffer = min(ao_num*ao_num*ao_num,16000000)
  print*, 'Providing the molecular integrals '
-  print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +&
+  print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+&
      ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
  
  call wall_time(wall_1)
@ -864,18 +858,18 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
      !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
      !$OMP  wall_0,thread_num)                                      &
      !$OMP  DEFAULT(NONE)                                           &
-      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,mo_tot_num_align,&
+      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,   &
      !$OMP  mo_coef_transp,                                         &
      !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
      !$OMP  mo_coef_is_built, wall_1,                               &
      !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
  n_integrals = 0
  wall_0 = wall_1
-  allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k),                 &
-      bielec_tmp_1(mo_tot_num_align),                                &
+  allocate(bielec_tmp_3(mo_tot_num, n_j, n_k),                 &
+      bielec_tmp_1(mo_tot_num),                                &
      bielec_tmp_0(ao_num,ao_num),                                   &
      bielec_tmp_0_idx(ao_num),                                      &
-      bielec_tmp_2(mo_tot_num_align, n_j),                           &
+      bielec_tmp_2(mo_tot_num, n_j),                           &
      buffer_i(size_buffer),                                         &
      buffer_value(size_buffer) )
  
@ -888,10 +882,8 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
    !      cycle
    !    endif
    !IRP_ENDIF
-    !DEC$ VECTOR ALIGNED
    bielec_tmp_3 = 0.d0
    do k1 = 1,ao_num
-      !DEC$ VECTOR ALIGNED
      bielec_tmp_2 = 0.d0
      do j1 = 1,ao_num
        call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1))
@ -913,7 +905,6 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
          cycle
        endif
        
-        !DEC$ VECTOR ALIGNED
        bielec_tmp_1 = 0.d0
        ii1=1
        do ii1 = 1,kmax-4,4
@ -1018,7 +1009,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
            endif
            n_integrals += 1
            buffer_value(n_integrals) = bielec_tmp_1(i)
-            !DEC$ FORCEINLINE
+            !DIR$ FORCEINLINE
            call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
            if (n_integrals == size_buffer) then
              call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@ -1071,9 +1062,9 @@ end



- BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti_from_ao, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti_from_ao, (mo_tot_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! mo_bielec_integral_jj_from_ao(i,j) = J_ij
@ -1103,20 +1094,19 @@ end
  !$OMP PARALLEL DEFAULT(NONE)                                       &
      !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx,  &
      !$OMP  iqrs, iqsr,iqri,iqis)                                   &
-      !$OMP SHARED(mo_tot_num,mo_coef_transp,mo_tot_num_align,ao_num,&
+      !$OMP SHARED(mo_tot_num,mo_coef_transp,ao_num,                 &
      !$OMP  ao_integrals_threshold,do_direct_integrals)             &
      !$OMP REDUCTION(+:mo_bielec_integral_jj_from_ao,mo_bielec_integral_jj_exchange_from_ao)
  
  allocate( int_value(ao_num), int_idx(ao_num),                      &
-      iqrs(mo_tot_num_align,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
-      iqsr(mo_tot_num_align,ao_num) )
+      iqrs(mo_tot_num,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
+      iqsr(mo_tot_num,ao_num) )
  
  !$OMP DO SCHEDULE (guided)
  do s=1,ao_num
    do q=1,ao_num
      
      do j=1,ao_num
-        !DIR$ VECTOR ALIGNED
        do i=1,mo_tot_num
          iqrs(i,j) = 0.d0
          iqsr(i,j) = 0.d0
@ -1130,7 +1120,6 @@ end
          do p=1,ao_num
            integral = int_value(p)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i=1,mo_tot_num
                iqrs(i,r) += mo_coef_transp(i,p) * integral
              enddo
@ -1140,7 +1129,6 @@ end
          do p=1,ao_num
            integral = int_value(p)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i=1,mo_tot_num
                iqsr(i,r) += mo_coef_transp(i,p) * integral
              enddo
@ -1156,7 +1144,6 @@ end
            p = int_idx(pp)
            integral = int_value(pp)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i=1,mo_tot_num
                iqrs(i,r) += mo_coef_transp(i,p) * integral
              enddo
@ -1167,7 +1154,6 @@ end
            p = int_idx(pp)
            integral = int_value(pp)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i=1,mo_tot_num
                iqsr(i,r) += mo_coef_transp(i,p) * integral
              enddo
@ -1178,14 +1164,12 @@ end
      iqis = 0.d0
      iqri = 0.d0
      do r=1,ao_num
-        !DIR$ VECTOR ALIGNED
        do i=1,mo_tot_num
          iqis(i) += mo_coef_transp(i,r) * iqrs(i,r)
          iqri(i) += mo_coef_transp(i,r) * iqsr(i,r)
        enddo
      enddo
      do i=1,mo_tot_num
-        !DIR$ VECTOR ALIGNED
        do j=1,mo_tot_num
          c = mo_coef_transp(j,q)*mo_coef_transp(j,s)
          mo_bielec_integral_jj_from_ao(j,i) += c * iqis(i)
@ -1204,9 +1188,9 @@ end
  
 END_PROVIDER

- BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_exchange_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_anti_from_ao, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_exchange_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_anti_from_ao, (mo_tot_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! mo_bielec_integral_vv_from_ao(i,j) = J_ij
@ -1238,20 +1222,19 @@ END_PROVIDER
  !$OMP PARALLEL DEFAULT(NONE)                                       &
      !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx,&
      !$OMP  iqrs, iqsr,iqri,iqis)                                   &
-      !$OMP SHARED(n_virt_orb,mo_tot_num,list_virt,mo_coef_transp,mo_tot_num_align,ao_num,&
+      !$OMP SHARED(n_virt_orb,mo_tot_num,list_virt,mo_coef_transp,ao_num,&
      !$OMP  ao_integrals_threshold,do_direct_integrals)             &
      !$OMP REDUCTION(+:mo_bielec_integral_vv_from_ao,mo_bielec_integral_vv_exchange_from_ao)
  
  allocate( int_value(ao_num), int_idx(ao_num),                      &
-      iqrs(mo_tot_num_align,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
-      iqsr(mo_tot_num_align,ao_num) )
+      iqrs(mo_tot_num,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
+      iqsr(mo_tot_num,ao_num) )
  
  !$OMP DO SCHEDULE (guided)
  do s=1,ao_num
    do q=1,ao_num
      
      do j=1,ao_num
-        !DIR$ VECTOR ALIGNED
        do i0=1,n_virt_orb
          i = list_virt(i0)
          iqrs(i,j) = 0.d0
@ -1266,7 +1249,6 @@ END_PROVIDER
          do p=1,ao_num
            integral = int_value(p)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i0=1,n_virt_orb
                i = list_virt(i0)
                iqrs(i,r) += mo_coef_transp(i,p) * integral
@ -1277,7 +1259,6 @@ END_PROVIDER
          do p=1,ao_num
            integral = int_value(p)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i0=1,n_virt_orb
                i =list_virt(i0)
                iqsr(i,r) += mo_coef_transp(i,p) * integral
@ -1294,7 +1275,6 @@ END_PROVIDER
            p = int_idx(pp)
            integral = int_value(pp)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i0=1,n_virt_orb
                i =list_virt(i0)
                iqrs(i,r) += mo_coef_transp(i,p) * integral
@ -1306,7 +1286,6 @@ END_PROVIDER
            p = int_idx(pp)
            integral = int_value(pp)
            if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
              do i0=1,n_virt_orb
                i = list_virt(i0)
                iqsr(i,r) += mo_coef_transp(i,p) * integral
@ -1318,7 +1297,6 @@ END_PROVIDER
      iqis = 0.d0
      iqri = 0.d0
      do r=1,ao_num
-        !DIR$ VECTOR ALIGNED
        do i0=1,n_virt_orb
          i = list_virt(i0)
          iqis(i) += mo_coef_transp(i,r) * iqrs(i,r)
@ -1327,7 +1305,6 @@ END_PROVIDER
      enddo
      do i0=1,n_virt_orb
        i= list_virt(i0)
-        !DIR$ VECTOR ALIGNED
        do j0=1,n_virt_orb
          j = list_virt(j0)
          c = mo_coef_transp(j,q)*mo_coef_transp(j,s)
@ -1354,9 +1331,9 @@ END_PROVIDER
 END_PROVIDER


- BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti, (mo_tot_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! mo_bielec_integral_jj(i,j) = J_ij
--- a/src/Integrals_Monoelec/ao_mono_ints.irp.f
+++ b/src/Integrals_Monoelec/ao_mono_ints.irp.f
@ -1,4 +1,4 @@
- BEGIN_PROVIDER [ double precision, ao_mono_elec_integral,(ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_mono_elec_integral,(ao_num,ao_num)]
 &BEGIN_PROVIDER [ double precision, ao_mono_elec_integral_diag,(ao_num)]
  implicit none
  integer :: i,j,n,l
@ -7,7 +7,6 @@
 ! : sum of the kinetic and nuclear electronic potential 
  END_DOC
  do j = 1, ao_num
-   !DIR$ VECTOR ALIGNED
   do i = 1, ao_num
    ao_mono_elec_integral(i,j) = ao_nucl_elec_integral(i,j) + ao_kinetic_integral(i,j) + ao_pseudo_integral(i,j)
   enddo
--- a/src/Integrals_Monoelec/kin_mo_ints.irp.f
+++ b/src/Integrals_Monoelec/kin_mo_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, mo_kinetic_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_kinetic_integral, (mo_tot_num,mo_tot_num)]
  implicit none
  BEGIN_DOC
  !  Kinetic energy integrals in the MO basis
--- a/src/Integrals_Monoelec/mo_mono_ints.irp.f
+++ b/src/Integrals_Monoelec/mo_mono_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [ double precision, mo_mono_elec_integral,(mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [ double precision, mo_mono_elec_integral,(mo_tot_num,mo_tot_num)]
  implicit none
  integer                        :: i,j,n,l
  BEGIN_DOC
--- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f
+++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
  ! Pseudo-potential integrals
@ -29,7 +29,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num_align,ao_num)]
  
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
  ! Local pseudo-potential
@ -128,7 +128,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu
 END_PROVIDER


- BEGIN_PROVIDER [ double precision, ao_pseudo_integral_non_local, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_non_local, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
  ! Local pseudo-potential
--- a/src/Integrals_Monoelec/pot_mo_ints.irp.f
+++ b/src/Integrals_Monoelec/pot_mo_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num,mo_tot_num)]
 implicit none
 BEGIN_DOC
 ! interaction nuclear electron on the MO basis
@ -25,7 +25,7 @@ BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_to
 END_PROVIDER


-BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num_align,mo_tot_num,nucl_num)]
+BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num,mo_tot_num,nucl_num)]
 implicit none
 BEGIN_DOC
 ! mo_nucl_elec_integral_per_atom(i,j,k) = -<MO(i)|1/|r-Rk|MO(j)> 
--- a/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f
+++ b/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, mo_pseudo_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_pseudo_integral, (mo_tot_num,mo_tot_num)]
  implicit none
  BEGIN_DOC
  ! interaction nuclear electron on the MO basis
--- a/src/Integrals_Monoelec/spread_dipole_ao.irp.f
+++ b/src/Integrals_Monoelec/spread_dipole_ao.irp.f
@ -1,6 +1,6 @@
-  BEGIN_PROVIDER [ double precision, ao_spread_x, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_spread_y, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_spread_z, (ao_num_align,ao_num)]
+  BEGIN_PROVIDER [ double precision, ao_spread_x, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_spread_y, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_spread_z, (ao_num,ao_num)]
 BEGIN_DOC
 ! array of the integrals of AO_i * x^2 AO_j
 ! array of the integrals of AO_i * y^2 AO_j
@ -35,8 +35,6 @@
   power_A(1)  = ao_power( j, 1 )
   power_A(2)  = ao_power( j, 2 )
   power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
   do i= 1,ao_num
    B_center(1) = nucl_coord( ao_nucl(i), 1 )
    B_center(2) = nucl_coord( ao_nucl(i), 2 )
@ -49,7 +47,6 @@
    accu_z = 0.d0
    do n = 1,ao_prim_num(j)
     alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
     do l = 1, ao_prim_num(i)
      c = ao_coef_normalized_ordered_transp(n,j)*ao_coef_normalized_ordered_transp(l,i)
      beta = ao_expo_ordered_transp(l,i)
@ -72,9 +69,9 @@



-  BEGIN_PROVIDER [ double precision, ao_dipole_x, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_dipole_y, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_dipole_z, (ao_num_align,ao_num)]
+  BEGIN_PROVIDER [ double precision, ao_dipole_x, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_dipole_y, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_dipole_z, (ao_num,ao_num)]
 BEGIN_DOC
 ! array of the integrals of AO_i * x AO_j
 ! array of the integrals of AO_i * y AO_j
@ -109,8 +106,6 @@
   power_A(1)  = ao_power( j, 1 )
   power_A(2)  = ao_power( j, 2 )
   power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
   do i= 1,ao_num
    B_center(1) = nucl_coord( ao_nucl(i), 1 )
    B_center(2) = nucl_coord( ao_nucl(i), 2 )
@ -123,7 +118,6 @@
    accu_z = 0.d0
    do n = 1,ao_prim_num(j)
     alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
     do l = 1, ao_prim_num(i)
      beta = ao_expo_ordered_transp(l,i)
      c = ao_coef_normalized_ordered_transp(l,i)*ao_coef_normalized_ordered_transp(n,j)
@ -145,9 +139,9 @@
  !$OMP END PARALLEL DO
 END_PROVIDER

-  BEGIN_PROVIDER [ double precision, ao_deriv_1_x, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_deriv_1_y, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_deriv_1_z, (ao_num_align,ao_num)]
+  BEGIN_PROVIDER [ double precision, ao_deriv_1_x, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_deriv_1_y, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_deriv_1_z, (ao_num,ao_num)]
 BEGIN_DOC
 ! array of the integrals of AO_i * d/dx  AO_j
 ! array of the integrals of AO_i * d/dy  AO_j
@ -183,8 +177,6 @@
   power_A(1)  = ao_power( j, 1 )
   power_A(2)  = ao_power( j, 2 )
   power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
   do i= 1,ao_num
    B_center(1) = nucl_coord( ao_nucl(i), 1 )
    B_center(2) = nucl_coord( ao_nucl(i), 2 )
@ -197,7 +189,6 @@
    accu_z = 0.d0
    do n = 1,ao_prim_num(j)
     alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
     do l = 1, ao_prim_num(i)
      beta = ao_expo_ordered_transp(l,i)
      call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
--- a/src/Integrals_Monoelec/spread_dipole_mo.irp.f
+++ b/src/Integrals_Monoelec/spread_dipole_mo.irp.f
@ -1,6 +1,6 @@
- BEGIN_PROVIDER [double precision, mo_dipole_x , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_dipole_y , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_dipole_z , (mo_tot_num_align,mo_tot_num)]
+ BEGIN_PROVIDER [double precision, mo_dipole_x , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_dipole_y , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_dipole_z , (mo_tot_num,mo_tot_num)]
 BEGIN_DOC
 ! array of the integrals of MO_i * x MO_j
 ! array of the integrals of MO_i * y MO_j
@ -29,9 +29,9 @@

 END_PROVIDER

- BEGIN_PROVIDER [double precision, mo_spread_x , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_spread_y , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_spread_z , (mo_tot_num_align,mo_tot_num)]
+ BEGIN_PROVIDER [double precision, mo_spread_x , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_spread_y , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_spread_z , (mo_tot_num,mo_tot_num)]
 BEGIN_DOC
 ! array of the integrals of MO_i * x^2 MO_j
 ! array of the integrals of MO_i * y^2 MO_j
--- a/src/MOGuess/mo_ortho_lowdin.irp.f
+++ b/src/MOGuess/mo_ortho_lowdin.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
 ! matrix of the coefficients of the mos generated by the 
@ -8,12 +8,12 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)]
  integer                        :: i,j,k,l
  double precision               :: accu
  double precision, allocatable  :: tmp_matrix(:,:)
-  allocate (tmp_matrix(ao_num_align,ao_num))
+  allocate (tmp_matrix(ao_num,ao_num))
  tmp_matrix(:,:) = 0.d0
  do j=1, ao_num
    tmp_matrix(j,j) = 1.d0
  enddo
-  call ortho_lowdin(ao_overlap,ao_num_align,ao_num,tmp_matrix,ao_num_align,ao_num)
+  call ortho_lowdin(ao_overlap,ao_num,ao_num,tmp_matrix,ao_num,ao_num)
  do i=1, ao_num
    do j=1, ao_num
      ao_ortho_lowdin_coef(j,i) = tmp_matrix(i,j)
@ -22,7 +22,7 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)]
  deallocate(tmp_matrix)
 END_PROVIDER

-BEGIN_PROVIDER [double precision, ao_ortho_lowdin_overlap, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_lowdin_overlap, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
 ! overlap matrix of the ao_ortho_lowdin
--- a/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f
+++ b/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integral, (mo_tot_num,mo_tot_num)]
 implicit none
 integer :: i1,j1,i,j
 double precision :: c_i1,c_j1
--- a/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f
+++ b/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f
@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integral, (mo_tot_num,mo_tot_num)]
 implicit none
 integer :: i1,j1,i,j
 double precision :: c_i1,c_j1
--- a/src/MO_Basis/ao_ortho_canonical.irp.f
+++ b/src/MO_Basis/ao_ortho_canonical.irp.f
@ -1,4 +1,4 @@
- BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)]
 &BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num ]
  implicit none
  BEGIN_DOC
@ -83,7 +83,7 @@ END_PROVIDER



-BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num,ao_num)]
 implicit none
 BEGIN_DOC
 ! ao_ortho_canonical_coef^(-1)
@ -92,7 +92,7 @@ BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num_align,ao
     ao_num, ao_ortho_canonical_coef_inv, size(ao_ortho_canonical_coef_inv,1))
 END_PROVIDER

- BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num,ao_num)]
 &BEGIN_PROVIDER [ integer, ao_ortho_canonical_num ]
  implicit none
  BEGIN_DOC
--- a/src/MO_Basis/mo_overlap.irp.f
+++ b/src/MO_Basis/mo_overlap.irp.f
@ -1,5 +1,5 @@

-BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num,mo_tot_num)]
  implicit none
  integer :: i,j,n,l
  double precision :: f
@ -13,7 +13,6 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)]
   do i= 1,mo_tot_num
    mo_overlap(i,j) = 0.d0
    do n = 1, lmax,4
-     !DIR$ VECTOR ALIGNED
     do l = 1, ao_num
      mo_overlap(i,j) = mo_overlap(i,j) + mo_coef(l,i) * &
           ( mo_coef(n  ,j) * ao_overlap(l,n  )  &
@ -23,7 +22,6 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)]
     enddo
    enddo
    do n = lmax+1, ao_num
-     !DIR$ VECTOR ALIGNED
     do l = 1, ao_num
      mo_overlap(i,j) = mo_overlap(i,j) + mo_coef(n,j) * mo_coef(l,i) * ao_overlap(l,n)
     enddo
--- a/src/MO_Basis/mo_permutation.irp.f
+++ b/src/MO_Basis/mo_permutation.irp.f
@ -2,7 +2,7 @@ program permut_mos
 implicit none
 integer :: mo1,mo2
 integer :: i,j,k,l
- double precision :: mo_coef_tmp(ao_num_align,2)
+ double precision :: mo_coef_tmp(ao_num,2)
 print*,'Which MOs would you like to change ?'
 read(5,*)mo1,mo2
 print*,''
--- a/src/MO_Basis/mos.irp.f
+++ b/src/MO_Basis/mos.irp.f
@ -16,17 +16,7 @@ BEGIN_PROVIDER [ integer, mo_tot_num ]
 END_PROVIDER


-BEGIN_PROVIDER [ integer, mo_tot_num_align ]
-  implicit none
-  BEGIN_DOC
-  ! Aligned variable for dimensioning of arrays
-  END_DOC
-  integer                        :: align_double
-  mo_tot_num_align = align_double(mo_tot_num)
-END_PROVIDER
-
-
- BEGIN_PROVIDER [ double precision, mo_coef, (ao_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! Molecular orbital coefficients on AO basis set
@ -42,32 +32,18 @@ END_PROVIDER
  ! Coefs
  call ezfio_has_mo_basis_mo_coef(exists)
  if (exists) then
-    allocate(buffer(ao_num,mo_tot_num))
-    buffer = 0.d0
-    call ezfio_get_mo_basis_mo_coef(buffer)
-    do i=1,mo_tot_num
-      do j=1,ao_num
-        mo_coef(j,i) = buffer(j,i)
-      enddo
-      do j=ao_num+1,ao_num_align
-        mo_coef(j,i) = 0.d0
-      enddo
-    enddo
-    deallocate(buffer)
+    call ezfio_get_mo_basis_mo_coef(mo_coef)
  else
    ! Orthonormalized AO basis
    do i=1,mo_tot_num
      do j=1,ao_num
        mo_coef(j,i) = ao_ortho_canonical_coef(j,i)
      enddo
-      do j=ao_num+1,ao_num_align
-        mo_coef(j,i) = 0.d0
-      enddo
    enddo
  endif
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num_align, mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num, mo_tot_num) ]
 implicit none
 BEGIN_DOC
 ! MO coefficients in orthogonalized AO basis
@ -99,7 +75,7 @@ BEGIN_PROVIDER [ character*(64), mo_label ]
  endif
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num,ao_num) ]
  implicit none
  BEGIN_DOC
  ! Molecular orbital coefficients on AO basis set
@ -110,14 +86,11 @@ BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num_align,ao_num) ]
    do i=1,mo_tot_num
      mo_coef_transp(i,j) = mo_coef(j,i)
    enddo
-    do i=mo_tot_num+1,mo_tot_num_align
-      mo_coef_transp(i,j) = 0.d0
-    enddo
  enddo
  
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num_align, mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num, mo_tot_num) ]
 implicit none
 BEGIN_DOC
 ! Product S.C where S is the overlap matrix in the AO basis and C the mo_coef matrix.
@ -165,18 +138,18 @@ subroutine ao_to_mo(A_ao,LDA_ao,A_mo,LDA_mo)
  double precision, intent(out)  :: A_mo(LDA_mo,mo_tot_num)
  double precision, allocatable  :: T(:,:)
  
-  allocate ( T(ao_num_align,mo_tot_num) )
+  allocate ( T(ao_num,mo_tot_num) )
  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
  
  call dgemm('N','N', ao_num, mo_tot_num, ao_num,                    &
      1.d0, A_ao,LDA_ao,                                             &
      mo_coef, size(mo_coef,1),                                      &
-      0.d0, T, ao_num_align)
+      0.d0, T, size(T,1))
  
  call dgemm('T','N', mo_tot_num, mo_tot_num, ao_num,                &
      1.d0, mo_coef,size(mo_coef,1),                                 &
-      T, ao_num_align,                                               &
-      0.d0, A_mo, LDA_mo)
+      T, ao_num,                                                     &   
+      0.d0, A_mo, size(A_mo,1))
  
  deallocate(T)
 end
@ -193,7 +166,7 @@ subroutine mo_to_ao(A_mo,LDA_mo,A_ao,LDA_ao)
  double precision, intent(out)  :: A_ao(LDA_ao,ao_num)
  double precision, allocatable  :: T(:,:)
  
-  allocate ( T(mo_tot_num_align,ao_num) )
+  allocate ( T(mo_tot_num,ao_num) )
  
  call dgemm('N','T', mo_tot_num, ao_num, mo_tot_num,                &
      1.d0, A_mo,size(A_mo,1),                                       &
@ -219,7 +192,7 @@ subroutine mo_to_ao_no_overlap(A_mo,LDA_mo,A_ao,LDA_ao)
  double precision, intent(out)  :: A_ao(LDA_ao,ao_num)
  double precision, allocatable  :: T(:,:)
  
-  allocate ( T(mo_tot_num_align,ao_num) )
+  allocate ( T(mo_tot_num,ao_num) )
  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
  
  call dgemm('N','T', mo_tot_num, ao_num, mo_tot_num,                &
@ -281,7 +254,7 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA)
  double precision, intent(out)  :: A(LDA,*)
  double precision, allocatable  :: T(:,:)
  
-  allocate ( T(ao_num_align,ao_num) )
+  allocate ( T(ao_num,ao_num) )
  
  call dgemm('T','N', ao_num, ao_num, ao_num,                        &
      1.d0,                                                          &
--- a/src/MO_Basis/swap_mos.irp.f
+++ b/src/MO_Basis/swap_mos.irp.f
@ -4,7 +4,7 @@ program swap_mos
  double precision :: x
  print *,  'MOs to swap?'
  read(*,*) i1, i2
-  do i=1,ao_num_align
+  do i=1,ao_num
    x = mo_coef(i,i1)
    mo_coef(i,i1) = mo_coef(i,i2)
    mo_coef(i,i2) = x
--- a/src/MO_Basis/utils.irp.f
+++ b/src/MO_Basis/utils.irp.f
@ -60,7 +60,7 @@ subroutine mo_as_eigvectors_of_mo_matrix(matrix,n,m,label,sign,output)
    print *, irp_here, ': Error : m/= mo_tot_num'
    stop 1
  endif
-  allocate(A(n,m),R(n,m),mo_coef_new(ao_num_align,m),eigvalues(m))
+  allocate(A(n,m),R(n,m),mo_coef_new(ao_num,m),eigvalues(m))
  if (sign == -1) then
    do j=1,m
      do i=1,n
@ -121,7 +121,7 @@ subroutine mo_as_svd_vectors_of_mo_matrix(matrix,lda,m,n,label)
    stop 1
  endif

-  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num_align,m),D(m),Vt(lda,n))
+  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num,m),D(m),Vt(lda,n))

  do j=1,n
    do i=1,m
@ -167,7 +167,7 @@ subroutine mo_as_eigvectors_of_mo_matrix_sort_by_observable(matrix,observable,n,
    print *, irp_here, ': Error : m/= mo_tot_num'
    stop 1
  endif
-  allocate(R(n,m),mo_coef_new(ao_num_align,m),eigvalues(m),value(m),iorder(m))
+  allocate(R(n,m),mo_coef_new(ao_num,m),eigvalues(m),value(m),iorder(m))
  mo_coef_new = mo_coef
  
  call lapack_diag(eigvalues,R,matrix,size(matrix,1),size(matrix,2))
@ -242,7 +242,7 @@ subroutine mo_sort_by_observable(observable,label)
  double precision, allocatable  :: mo_coef_new(:,:),value(:)
  integer,allocatable :: iorder(:)
  
-  allocate(mo_coef_new(ao_num_align,mo_tot_num),value(mo_tot_num),iorder(mo_tot_num))
+  allocate(mo_coef_new(ao_num,mo_tot_num),value(mo_tot_num),iorder(mo_tot_num))
  print*,'allocate !'
  mo_coef_new = mo_coef
  
@ -283,7 +283,7 @@ end
 subroutine give_specific_mos_at_r(r,mos_array, mo_coef_specific)
 implicit none
 double precision, intent(in) :: r(3)
- double precision, intent(in)  :: mo_coef_specific(ao_num_align, mo_tot_num)
+ double precision, intent(in)  :: mo_coef_specific(ao_num, mo_tot_num)
 double precision, intent(out) :: mos_array(mo_tot_num)
 double precision :: aos_array(ao_num),accu
 integer :: i,j
--- a/plugins/MPI/.gitignore
+++ b/plugins/MPI/.gitignore
--- a/plugins/MPI/NEEDED_CHILDREN_MODULES
+++ b/plugins/MPI/NEEDED_CHILDREN_MODULES
--- a/plugins/MPI/README.rst
+++ b/plugins/MPI/README.rst
--- a/plugins/MPI/mpi.irp.f
+++ b/plugins/MPI/mpi.irp.f
@ -1,23 +1,3 @@
-BEGIN_PROVIDER [ integer, mpi_bit_kind ]
- use bitmasks
- implicit none
- BEGIN_DOC
- ! MPI bit kind type
- END_DOC
- IRP_IF MPI
-  include 'mpif.h'
-  if (bit_kind == 4) then
-    mpi_bit_kind = MPI_INTEGER4
-  else if (bit_kind == 8) then
-    mpi_bit_kind = MPI_INTEGER8
-  else
-    stop 'Wrong bit kind in mpi_bit_kind'
-  endif
- IRP_ELSE
-  mpi_bit_kind = -1
- IRP_ENDIF
-END_PROVIDER
-
 BEGIN_PROVIDER [ logical, mpi_initialized ]
 implicit none
 BEGIN_DOC
@ -88,7 +68,6 @@ subroutine broadcast_chunks_$double(A, LDA)
  implicit none
  integer, intent(in)             :: LDA
  $type, intent(inout) :: A(LDA)
-  use bitmasks
  BEGIN_DOC
 ! Broadcast with chunks of ~2GB
  END_DOC
@ -99,7 +78,7 @@ subroutine broadcast_chunks_$double(A, LDA)
      sze = min(LDA-i+1, 200000000/$8)
      call MPI_BCAST (A(i), sze, MPI_$DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
      if (ierr /= MPI_SUCCESS) then
-        print *,  irp_here//': Unable to broadcast chuks $double ', i
+        print *,  irp_here//': Unable to broadcast chunks $double ', i
        stop -1
      endif
    enddo
@ -107,10 +86,9 @@ subroutine broadcast_chunks_$double(A, LDA)
 end

 SUBST [ double, type, 8, DOUBLE_PRECISION ]
-double    ; double precision  ; 8             ; DOUBLE_PRECISION ;;
-bit_kind  ; integer(bit_kind) ; bit_kind_size ; BIT_KIND ;;
-integer   ; integer          ; 4             ; INTEGER4 ;;
-integer8  ; integer*8        ; 8             ; INTEGER8 ;;
+double    ; double precision  ; 8 ; DOUBLE_PRECISION ;;
+integer   ; integer           ; 4  ; INTEGER4 ;;
+integer8  ; integer*8         ; 8  ; INTEGER8 ;;

 END_TEMPLATE

--- a/src/Nuclei/nuclei.irp.f
+++ b/src/Nuclei/nuclei.irp.f
@ -1,15 +1,4 @@
-BEGIN_PROVIDER [ integer, nucl_num_aligned ]
-   implicit none
-   BEGIN_DOC
-   ! Number of nuclei algined
-   END_DOC
-   
-   PROVIDE ezfio_filename
-   integer                        :: align_double
-   nucl_num_aligned = align_double(nucl_num)
-END_PROVIDER
- 
-BEGIN_PROVIDER [ double precision, nucl_coord,  (nucl_num_aligned,3) ]
+BEGIN_PROVIDER [ double precision, nucl_coord,  (nucl_num,3) ]
   implicit none
   
   BEGIN_DOC
@ -79,11 +68,11 @@ BEGIN_PROVIDER [ double precision, nucl_coord_transp, (3,nucl_num) ]
   enddo
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, nucl_dist_2, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist_vec_x, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist_vec_y, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist_vec_z, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist, (nucl_num_aligned,nucl_num) ]
+ BEGIN_PROVIDER [ double precision, nucl_dist_2, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist_vec_x, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist_vec_y, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist_vec_z, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist, (nucl_num,nucl_num) ]
   implicit none
   BEGIN_DOC
   ! nucl_dist     : Nucleus-nucleus distances
@ -105,16 +94,12 @@ END_PROVIDER
   endif
   
   do ie2 = 1,nucl_num
-     !DEC$ VECTOR ALWAYS
-     !DEC$ VECTOR ALIGNED
-     do ie1 = 1,nucl_num_aligned
+     do ie1 = 1,nucl_num
       nucl_dist_vec_x(ie1,ie2) = nucl_coord(ie1,1) - nucl_coord(ie2,1)
       nucl_dist_vec_y(ie1,ie2) = nucl_coord(ie1,2) - nucl_coord(ie2,2)
       nucl_dist_vec_z(ie1,ie2) = nucl_coord(ie1,3) - nucl_coord(ie2,3)
     enddo
-     !DEC$ VECTOR ALWAYS
-     !DEC$ VECTOR ALIGNED
-     do ie1 = 1,nucl_num_aligned
+     do ie1 = 1,nucl_num
       nucl_dist_2(ie1,ie2) = nucl_dist_vec_x(ie1,ie2)*nucl_dist_vec_x(ie1,ie2) +&
           nucl_dist_vec_y(ie1,ie2)*nucl_dist_vec_y(ie1,ie2) +       &
           nucl_dist_vec_z(ie1,ie2)*nucl_dist_vec_z(ie1,ie2)
--- a/src/Selectors_Utils/NEEDED_CHILDREN_MODULES
+++ b/src/Selectors_Utils/NEEDED_CHILDREN_MODULES
@ -0,0 +1 @@
+Determinants 
--- a/src/Selectors_Utils/README.rst
+++ b/src/Selectors_Utils/README.rst
@ -0,0 +1,190 @@
+=====================
+Selectors_full Module
+=====================
+
+Needed Modules
+==============
+
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+
+.. image:: tree_dependency.png
+
+* `Determinants <http://github.com/LCPQ/quantum_package/tree/master/src/Determinants>`_
+* `Hartree_Fock <http://github.com/LCPQ/quantum_package/tree/master/src/Hartree_Fock>`_
+
+Needed Modules
+==============
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+
+
+.. image:: tree_dependency.png
+
+* `Determinants <http://github.com/LCPQ/quantum_package/tree/master/src/Determinants>`_
+* `Hartree_Fock <http://github.com/LCPQ/quantum_package/tree/master/plugins/Hartree_Fock>`_
+
+Documentation
+=============
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+
+
+`coef_hf_selector <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L28>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`delta_e_per_selector <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L33>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`double_index_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L4>`_
+  degree of excitation respect to Hartree Fock for the wave function
+  .br
+  for the all the selectors determinants
+  .br
+  double_index_selectors = list of the index of the double excitations
+  .br
+  n_double_selectors = number of double excitations in the selectors determinants
+
+
+`e_corr_double_only <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L34>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`e_corr_per_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L31>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`e_corr_second_order <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L35>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`exc_degree_per_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L3>`_
+  degree of excitation respect to Hartree Fock for the wave function
+  .br
+  for the all the selectors determinants
+  .br
+  double_index_selectors = list of the index of the double excitations
+  .br
+  n_double_selectors = number of double excitations in the selectors determinants
+
+
+`i_h_hf_per_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L32>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`inv_selectors_coef_hf <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L29>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`inv_selectors_coef_hf_squared <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L30>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`n_det_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L8>`_
+  For Single reference wave functions, the number of selectors is 1 : the
+  Hartree-Fock determinant
+
+
+`n_double_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L5>`_
+  degree of excitation respect to Hartree Fock for the wave function
+  .br
+  for the all the selectors determinants
+  .br
+  double_index_selectors = list of the index of the double excitations
+  .br
+  n_double_selectors = number of double excitations in the selectors determinants
+
+
+`psi_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L32>`_
+  Determinants on which we apply <i|H|psi> for perturbation.
+
+
+`psi_selectors_coef <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L33>`_
+  Determinants on which we apply <i|H|psi> for perturbation.
+
+
+`psi_selectors_coef_transp <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L53>`_
+  Transposed psi_selectors
+
+
+`psi_selectors_diag_h_mat <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L67>`_
+  Diagonal elements of the H matrix for each selectors
+
+
+`psi_selectors_size <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L3>`_
+  Undocumented
+
+
+`zmq_get_psi <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/zmq.irp.f#L51>`_
+  Get the wave function from the qp_run scheduler
+
+
+`zmq_put_psi <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/zmq.irp.f#L1>`_
+  Put the wave function on the qp_run scheduler
+
--- a/plugins/Selectors_full/e_corr_selectors.irp.f
+++ b/plugins/Selectors_full/e_corr_selectors.irp.f
@ -1,4 +1,3 @@
-
 use bitmasks
 BEGIN_PROVIDER [integer, exc_degree_per_selectors, (N_det_selectors)]
 &BEGIN_PROVIDER [integer, double_index_selectors, (N_det_selectors)]
--- a/src/Selectors_Utils/selectors.irp.f
+++ b/src/Selectors_Utils/selectors.irp.f
@ -0,0 +1,34 @@
+use bitmasks
+
+BEGIN_PROVIDER [ integer, psi_selectors_size ]
+ implicit none
+ psi_selectors_size = psi_det_size
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Transposed psi_selectors
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_states
+      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Diagonal elements of the H matrix for each selectors 
+  END_DOC
+  integer                        :: i
+  double precision :: diag_H_mat_elem
+  do i = 1, N_det_selectors
+   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
+  enddo
+END_PROVIDER
+
+
--- a/plugins/Selectors_full/zmq.irp.f
+++ b/plugins/Selectors_full/zmq.irp.f
@ -19,43 +19,6 @@ subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id)
 end


-subroutine zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Put the X vector on the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  character*(*)                  :: name
-  integer, intent(in)            :: size_x
-  double precision, intent(out)  :: x(size_x)
-  integer                        :: rc
-  character*(256)                :: msg
-
-
-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, name
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
-  if (rc /= len(trim(msg))) then
-    print *,  irp_here, ': Error sending '//name
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0)
-  if (rc /= size_x*8) then
-    print *,  irp_here, ': Error sending '//name
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:rc) /= 'put_data_reply ok') then
-    print *,  rc, trim(msg)
-    print *,  irp_here, ': Error in put_data_reply'
-    stop 'error'
-  endif
-
-end
-

 BEGIN_TEMPLATE 

@ -70,7 +33,7 @@ subroutine zmq_put_$X(zmq_to_qp_run_socket,worker_id)
  integer                        :: rc
  character*(256)                :: msg

-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, '$X'
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, '$X'
  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
  if (rc /= len(trim(msg))) then
    print *,  irp_here, ': Error sending $X'
@ -103,7 +66,7 @@ subroutine zmq_get_$X(zmq_to_qp_run_socket, worker_id)
  integer                        :: rc
  character*(64)                 :: msg

-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, '$X'
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, '$X'
  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
  if (rc /= len(trim(msg))) then
    print *,  irp_here, ': Error getting $X'
@ -132,7 +95,6 @@ N_det ;;
 psi_det_size ;;
 N_det_generators ;;
 N_det_selectors ;;
-N_states_diag ;;

 END_TEMPLATE

@ -147,7 +109,7 @@ subroutine zmq_put_psi_det(zmq_to_qp_run_socket,worker_id)
  integer                        :: rc, rc8
  character*(256)                :: msg

-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, 'psi_det'
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'psi_det'
  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
  if (rc /= len(trim(msg))) then
    print *,  irp_here, ': Error sending psi_det'
@ -179,7 +141,7 @@ subroutine zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id)
  integer                        :: rc, rc8
  character*(256)                :: msg

-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, 'psi_coef'
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'psi_coef'
  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
  if (rc /= len(trim(msg))) then
    print *,  irp_here, ': Error sending psi_coef'
@ -244,7 +206,7 @@ subroutine zmq_get_psi_det(zmq_to_qp_run_socket, worker_id)
  character*(64)                 :: msg

  
-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, 'psi_det' 
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'psi_det' 
  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
  if (rc /= len(trim(msg))) then
    print *,  irp_here, ': Error getting psi_det'
@ -279,7 +241,7 @@ subroutine zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id)
  character*(64)                 :: msg


-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, 'psi_coef'
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'psi_coef'
  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
  if (rc /= len(trim(msg))) then
    print *,  irp_here, ': Error getting psi_coef'
@ -302,41 +264,3 @@ subroutine zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id)
 end


-subroutine zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Get psi_coef from the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  integer, intent(in)            :: size_x
-  character*(*), intent(in)      :: name
-  double precision, intent(out)  :: x(size_x)
-  integer                        :: rc
-  integer*8                      :: rc8
-  character*(64)                 :: msg
-
-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, name
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
-  if (rc /= len(trim(msg))) then
-    print *,  irp_here, ': Error getting '//name
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:14) /= 'get_data_reply') then
-    print *,  rc, trim(msg)
-    print *,  irp_here, ': Error in get_data_reply'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0)
-  if (rc /= size_x*8) then
-    print *, irp_here, ': Error getting '//name
-    stop 'error'
-  endif
-end
-
-
-
--- a/src/Utils/LinearAlgebra.irp.f
+++ b/src/Utils/LinearAlgebra.irp.f
@ -70,7 +70,7 @@ subroutine ortho_canonical(overlap,LDA,N,C,LDC,m)
  double precision, allocatable  :: Vt(:,:)
  double precision, allocatable  :: D(:)
  double precision, allocatable  :: S(:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: U, Vt, D
+  !DIR$ ATTRIBUTES ALIGN : 64    :: U, Vt, D
  integer                        :: info, i, j
  
  if (n < 2) then
--- a/src/Utils/integration.irp.f
+++ b/src/Utils/integration.irp.f
@ -37,7 +37,7 @@ subroutine give_explicit_poly_and_gaussian_x(P_new,P_center,p,fact_k,iorder,alph
  call recentered_poly2(P_a(0),A_center,P_center,a,P_b(0),B_center,P_center,b)
  n_new = 0

-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(P_a(0),a,P_b(0),b,P_new(0),n_new)
  iorder = a + b
 end
@ -76,44 +76,41 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
  P_new(0,2) = 0.d0
  P_new(0,3) = 0.d0
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center)
  if (fact_k < thresh) then
    fact_k = 0.d0
    return
  endif
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call recentered_poly2(P_a(0,1),A_center(1),P_center(1),a(1),P_b(0,1),B_center(1),P_center(1),b(1))
  iorder(1) = a(1) + b(1)
-  !DIR$ VECTOR ALIGNED
  do i=0,iorder(1)
    P_new(i,1) = 0.d0
  enddo
  n_new=0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(P_a(0,1),a(1),P_b(0,1),b(1),P_new(0,1),n_new)
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call recentered_poly2(P_a(0,2),A_center(2),P_center(2),a(2),P_b(0,2),B_center(2),P_center(2),b(2))
  iorder(2) = a(2) + b(2)
-  !DIR$ VECTOR ALIGNED
  do i=0,iorder(2)
    P_new(i,2) = 0.d0
  enddo
  n_new=0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(P_a(0,2),a(2),P_b(0,2),b(2),P_new(0,2),n_new)
  
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call recentered_poly2(P_a(0,3),A_center(3),P_center(3),a(3),P_b(0,3),B_center(3),P_center(3),b(3))
  iorder(3) = a(3) + b(3)
-  !DIR$ VECTOR ALIGNED
  do i=0,iorder(3)
    P_new(i,3) = 0.d0
  enddo
  n_new=0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
  call multiply_poly(P_a(0,3),a(3),P_b(0,3),b(3),P_new(0,3),n_new)
  
 end
@ -200,7 +197,7 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
  ASSERT (b>0.)
  
  double precision               :: xab(3), ab
-  !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
  
  p = a+b
  p_inv = 1.d0/(a+b)
@ -282,7 +279,6 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)
  endif
  ndtmp = nb+nc
  
-  !DIR$ VECTOR ALIGNED
  do ic = 0,nc
    d(ic) = d(ic) + c(ic) * b(0)
  enddo
--- a/src/Utils/transpose.irp.f
+++ b/src/Utils/transpose.irp.f
@ -8,7 +8,7 @@ recursive subroutine transpose(A,LDA,B,LDB,d1,d2)
 real, intent(in) :: A(LDA,d2)
 real, intent(out) :: B(LDB,d1)

- integer :: i,j,k, mod_align
+ integer :: i,j,k
 if ( d2 < 32 ) then
   do j=1,d1
     !DIR$ LOOP COUNT (16)
@ -55,7 +55,7 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2)
 ! enddo
 ! return

- integer :: i,j,k, mod_align
+ integer :: i,j,k
 if ( d2 < 32 ) then
   do j=1,d1
     !DIR$ LOOP COUNT (16)
--- a/src/Utils/util.irp.f
+++ b/src/Utils/util.irp.f
@ -10,7 +10,7 @@ double precision function binom_func(i,j)
  double precision               :: logfact
  integer, save                  :: ifirst
  double precision, save         :: memo(0:15,0:15)
-  !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: memo
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: memo
  integer                        :: k,l
  if (ifirst == 0) then
    ifirst = 1
@ -45,20 +45,6 @@ end
 END_PROVIDER


-integer function align_double(n)
-  implicit none
-  BEGIN_DOC
-  ! Compute 1st dimension such that it is aligned for vectorization.
-  END_DOC
-  integer                        :: n
-  include 'constants.include.F'
-  if (mod(n,SIMD_vector/4) /= 0) then
-    align_double= n + SIMD_vector/4 - mod(n,SIMD_vector/4)
-  else
-    align_double= n
-  endif
-end
-

 double precision function fact(n)
  implicit none
@ -333,7 +319,6 @@ subroutine normalize(u,sze)
  implicit none
  BEGIN_DOC
  ! Normalizes vector u
-  ! u is expected to be aligned in memory.
  END_DOC
  integer, intent(in)            :: sze
  double precision, intent(inout):: u(sze)
--- a/src/ZMQ/put_get.irp.f
+++ b/src/ZMQ/put_get.irp.f
@ -0,0 +1,76 @@
+subroutine zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put the X vector on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  character*(*)                  :: name
+  integer, intent(in)            :: size_x
+  double precision, intent(out)  :: x(size_x)
+  integer                        :: rc
+  character*(256)                :: msg
+
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, name
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error sending '//name
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0)
+  if (rc /= size_x*8) then
+    print *,  irp_here, ': Error sending '//name
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_data_reply ok') then
+    print *,  rc, trim(msg)
+    print *,  irp_here, ': Error in put_data_reply'
+    stop 'error'
+  endif
+
+end
+
+
+subroutine zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get psi_coef from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer, intent(in)            :: size_x
+  character*(*), intent(in)      :: name
+  double precision, intent(out)  :: x(size_x)
+  integer                        :: rc
+  integer*8                      :: rc8
+  character*(64)                 :: msg
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, name
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error getting '//name
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:14) /= 'get_data_reply') then
+    print *,  rc, trim(msg)
+    print *,  irp_here, ': Error in get_data_reply'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0)
+  if (rc /= size_x*8) then
+    print *, irp_here, ': Error getting '//name
+    stop 'error'
+  endif
+end
+
+
+