From 12295ce7c038bdec7575c23676a7cc5bb9e0734d Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 27 Nov 2017 10:58:32 +0100
Subject: [PATCH] Removed aligns

---
 plugins/DensityMatrix/density_matrix.irp.f    |   6 +-
 plugins/FOBOCI/SC2_1h1p.irp.f                 |   4 +-
 plugins/FOBOCI/density_matrix.irp.f           |  20 +-
 plugins/FOBOCI/diag_fock_inactiv_virt.irp.f   |   4 +-
 .../Hartree_Fock_SlaterDressed/dressing.irp.f |  10 +-
 plugins/MRCC_Utils/davidson.irp.f             |  65 +--
 plugins/Perturbation/pt2_equations.irp.f      |   2 +-
 plugins/Properties/average.irp.f              |   4 +-
 plugins/Properties/delta_rho.irp.f            |   6 +-
 plugins/Properties/mulliken.irp.f             |   6 +-
 plugins/Properties/routines_test.irp.f        |   4 +-
 .../slater_rules_mono_electronic.irp.f        |  22 +-
 .../Selectors_CASSD/NEEDED_CHILDREN_MODULES   |   2 +-
 plugins/Selectors_CASSD/selectors.irp.f       |  31 --
 plugins/Selectors_CASSD/zmq.irp.f             | 121 ----
 .../Selectors_full/NEEDED_CHILDREN_MODULES    |   2 +-
 plugins/Selectors_full/selectors.irp.f        |  31 --
 .../NEEDED_CHILDREN_MODULES                   |   2 +-
 .../e_corr_selectors.irp.f                    |  79 ---
 plugins/Selectors_no_sorted/selectors.irp.f   |  20 -
 plugins/read_integral/read_integrals_mo.irp.f |   2 +-
 src/AO_Basis/ao_overlap.irp.f                 |  16 +-
 src/AO_Basis/aos.irp.f                        |  31 +-
 src/Bitmask/mpi.irp.f                         |  44 ++
 src/Davidson/davidson_parallel.irp.f          |  66 +++
 src/Davidson/diagonalization.irp.f            |   2 -
 src/Davidson/diagonalization_hs2.irp.f        |   1 -
 src/Davidson/u0Hu0_old.irp.f                  | 518 ------------------
 src/Determinants/filter_connected.irp.f       |  12 +-
 src/Determinants/two_body_dm_map.irp.f        |   3 +-
 src/Integrals_Bielec/ao_bi_integrals.irp.f    |  70 +--
 src/Integrals_Bielec/gauss_legendre.irp.f     |  13 +-
 src/Integrals_Bielec/integrals_3_index.irp.f  |   4 +-
 src/Integrals_Bielec/mo_bi_integrals.irp.f    |  97 ++--
 src/Integrals_Monoelec/ao_mono_ints.irp.f     |   3 +-
 src/Integrals_Monoelec/kin_mo_ints.irp.f      |   2 +-
 src/Integrals_Monoelec/mo_mono_ints.irp.f     |   2 +-
 .../pot_ao_pseudo_ints.irp.f                  |   6 +-
 src/Integrals_Monoelec/pot_mo_ints.irp.f      |   4 +-
 .../pot_mo_pseudo_ints.irp.f                  |   2 +-
 src/Integrals_Monoelec/spread_dipole_ao.irp.f |  27 +-
 src/Integrals_Monoelec/spread_dipole_mo.irp.f |  12 +-
 src/MOGuess/mo_ortho_lowdin.irp.f             |   8 +-
 src/MOGuess/pot_mo_ortho_canonical_ints.irp.f |   2 +-
 src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f    |   2 +-
 src/MO_Basis/ao_ortho_canonical.irp.f         |   6 +-
 src/MO_Basis/mo_overlap.irp.f                 |   4 +-
 src/MO_Basis/mo_permutation.irp.f             |   2 +-
 src/MO_Basis/mos.irp.f                        |  51 +-
 src/MO_Basis/swap_mos.irp.f                   |   2 +-
 src/MO_Basis/utils.irp.f                      |  10 +-
 {plugins => src}/MPI/.gitignore               |   0
 {plugins => src}/MPI/NEEDED_CHILDREN_MODULES  |   0
 {plugins => src}/MPI/README.rst               |   0
 {plugins => src}/MPI/mpi.irp.f                |  30 +-
 src/Nuclei/nuclei.irp.f                       |  31 +-
 src/Selectors_Utils/NEEDED_CHILDREN_MODULES   |   1 +
 src/Selectors_Utils/README.rst                | 190 +++++++
 .../Selectors_Utils}/e_corr_selectors.irp.f   |   1 -
 src/Selectors_Utils/selectors.irp.f           |  34 ++
 .../Selectors_Utils}/zmq.irp.f                |  88 +--
 src/Utils/LinearAlgebra.irp.f                 |   2 +-
 src/Utils/integration.irp.f                   |  22 +-
 src/Utils/transpose.irp.f                     |   4 +-
 src/Utils/util.irp.f                          |  17 +-
 src/ZMQ/put_get.irp.f                         |  76 +++
 66 files changed, 649 insertions(+), 1312 deletions(-)
 delete mode 100644 plugins/Selectors_CASSD/zmq.irp.f
 delete mode 100644 plugins/Selectors_no_sorted/e_corr_selectors.irp.f
 create mode 100644 src/Bitmask/mpi.irp.f
 delete mode 100644 src/Davidson/u0Hu0_old.irp.f
 rename {plugins => src}/MPI/.gitignore (100%)
 rename {plugins => src}/MPI/NEEDED_CHILDREN_MODULES (100%)
 rename {plugins => src}/MPI/README.rst (100%)
 rename {plugins => src}/MPI/mpi.irp.f (72%)
 create mode 100644 src/Selectors_Utils/NEEDED_CHILDREN_MODULES
 create mode 100644 src/Selectors_Utils/README.rst
 rename {plugins/Selectors_full => src/Selectors_Utils}/e_corr_selectors.irp.f (99%)
 create mode 100644 src/Selectors_Utils/selectors.irp.f
 rename {plugins/Selectors_full => src/Selectors_Utils}/zmq.irp.f (73%)
 create mode 100644 src/ZMQ/put_get.irp.f

diff --git a/plugins/DensityMatrix/density_matrix.irp.f b/plugins/DensityMatrix/density_matrix.irp.f
index 3fc1d6f9..5e7d7cec 100644
--- a/plugins/DensityMatrix/density_matrix.irp.f
+++ b/plugins/DensityMatrix/density_matrix.irp.f
@@ -167,9 +167,9 @@ END_PROVIDER
  END_TEMPLATE
  
  
- BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num_align,mo_tot_num)]
+ BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num,mo_tot_num)]
    implicit none
    use bitmasks
    BEGIN_DOC
diff --git a/plugins/FOBOCI/SC2_1h1p.irp.f b/plugins/FOBOCI/SC2_1h1p.irp.f
index 7733831c..58f47552 100644
--- a/plugins/FOBOCI/SC2_1h1p.irp.f
+++ b/plugins/FOBOCI/SC2_1h1p.irp.f
@@ -725,8 +725,8 @@ subroutine density_matrix_1h1p(dets_in,u_in,density_matrix_alpha,density_matrix_
   integer, intent(in)            :: dim_in, sze, N_st, Nint
   integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
   double precision, intent(inout) :: u_in(dim_in,N_st)
-  double precision, intent(inout)  :: density_matrix_alpha(mo_tot_num_align,mo_tot_num)
-  double precision, intent(inout)  :: density_matrix_beta(mo_tot_num_align,mo_tot_num)
+  double precision, intent(inout)  :: density_matrix_alpha(mo_tot_num,mo_tot_num)
+  double precision, intent(inout)  :: density_matrix_beta(mo_tot_num,mo_tot_num)
   double precision, intent(inout)  :: norm
   
   integer :: i,j,k,l
diff --git a/plugins/FOBOCI/density_matrix.irp.f b/plugins/FOBOCI/density_matrix.irp.f
index aaf80c4f..0e668de5 100644
--- a/plugins/FOBOCI/density_matrix.irp.f
+++ b/plugins/FOBOCI/density_matrix.irp.f
@@ -1,5 +1,5 @@
- BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_generators_restart, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_generators_restart, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_generators_restart, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_generators_restart, (mo_tot_num,mo_tot_num) ]
 &BEGIN_PROVIDER [ double precision, norm_generators_restart]
    implicit none
    BEGIN_DOC
@@ -40,9 +40,9 @@
         !$OMP PRIVATE(j,k,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc, &
         !$OMP  tmp_a, tmp_b, n_occ_alpha)&
         !$OMP SHARED(psi_det_generators_restart,psi_coef_generators_restart,N_int,elec_alpha_num,&
-        !$OMP  elec_beta_num,one_body_dm_mo_alpha_generators_restart,one_body_dm_mo_beta_generators_restart,N_det_generators_restart,mo_tot_num_align,&
+        !$OMP  elec_beta_num,one_body_dm_mo_alpha_generators_restart,one_body_dm_mo_beta_generators_restart,N_det_generators_restart,&
         !$OMP  mo_tot_num,N_states, state_average_weight)
-     allocate(tmp_a(mo_tot_num_align,mo_tot_num), tmp_b(mo_tot_num_align,mo_tot_num) )
+     allocate(tmp_a(mo_tot_num,mo_tot_num), tmp_b(mo_tot_num,mo_tot_num) )
      tmp_a = 0.d0
      tmp_b = 0.d0
      !$OMP DO SCHEDULE(dynamic)
@@ -98,7 +98,7 @@ END_PROVIDER
 
 
 
-BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! One-body density matrix for the generators_restart
@@ -106,7 +106,7 @@ BEGIN_PROVIDER [ double precision, one_body_dm_mo_generators_restart, (mo_tot_nu
  one_body_dm_mo_generators_restart = one_body_dm_mo_alpha_generators_restart + one_body_dm_mo_beta_generators_restart
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, (mo_tot_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart, (mo_tot_num,mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! rho(alpha) - rho(beta)
@@ -115,16 +115,16 @@ BEGIN_PROVIDER [ double precision, one_body_spin_density_mo_generators_restart,
 END_PROVIDER
 
 
- BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_osoci, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_osoci, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_osoci, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_osoci, (mo_tot_num,mo_tot_num) ]
    implicit none
    BEGIN_DOC
    ! Alpha and beta one-body density matrix that will be used for the OSOCI approach
    END_DOC
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_1h1p, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_1h1p, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, one_body_dm_mo_alpha_1h1p, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, one_body_dm_mo_beta_1h1p, (mo_tot_num,mo_tot_num) ]
    implicit none
    BEGIN_DOC
    ! Alpha and beta one-body density matrix that will be used for the 1h1p approach
diff --git a/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f b/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f
index 7c30e175..83170b4b 100644
--- a/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f
+++ b/plugins/FOBOCI/diag_fock_inactiv_virt.irp.f
@@ -1,7 +1,7 @@
 subroutine diag_inactive_virt_and_update_mos
  implicit none
  integer :: i,j,i_inact,j_inact,i_virt,j_virt
- double precision :: tmp(mo_tot_num_align,mo_tot_num)
+ double precision :: tmp(mo_tot_num,mo_tot_num)
  character*(64) :: label
  print*,'Diagonalizing the occ and virt Fock operator'
  tmp = 0.d0
@@ -38,7 +38,7 @@ end
 subroutine diag_inactive_virt_new_and_update_mos
  implicit none
  integer :: i,j,i_inact,j_inact,i_virt,j_virt,k,k_act
- double precision :: tmp(mo_tot_num_align,mo_tot_num),accu,get_mo_bielec_integral
+ double precision :: tmp(mo_tot_num,mo_tot_num),accu,get_mo_bielec_integral
  character*(64) :: label
  tmp = 0.d0
  do i = 1, mo_tot_num
diff --git a/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f b/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f
index f42a0b0f..5426badb 100644
--- a/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f
+++ b/plugins/Hartree_Fock_SlaterDressed/dressing.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num,ao_num) ]
   implicit none
   BEGIN_DOC
   ! Dressing of the core hamiltonian in the orthogonal AO basis set
@@ -25,7 +25,7 @@ BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral_dressing, (ao_num
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral, (ao_num_align, ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_ortho_mono_elec_integral, (ao_num, ao_num) ]
  implicit none
  BEGIN_DOC
  ! h core in orthogonal AO basis
@@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, ao_mono_elec_integral_dressing, (ao_num,ao_nu
     ao_mono_elec_integral_dressing,size(ao_mono_elec_integral_dressing,1))
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_dressing, (mo_tot_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, mo_mono_elec_integral_dressing, (mo_tot_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! Dressing of the core hamiltonian in the MO basis set
@@ -73,14 +73,14 @@ BEGIN_PROVIDER [ integer, idx_dressing ]
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, cusp_corrected_mos, (ao_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, cusp_corrected_mos, (ao_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! Dressing core hamiltonian in the AO basis set
   END_DOC
   integer :: i,j
   double precision, allocatable :: F(:,:), M(:,:)
-  allocate(F(mo_tot_num_align,mo_tot_num),M(ao_num,mo_tot_num))
+  allocate(F(mo_tot_num,mo_tot_num),M(ao_num,mo_tot_num))
 
   logical :: oneshot
 
diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f
index 436b89a4..5014fcef 100644
--- a/plugins/MRCC_Utils/davidson.irp.f
+++ b/plugins/MRCC_Utils/davidson.irp.f
@@ -85,7 +85,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
   double precision, intent(inout) :: u_in(dim_in,N_st_diag)
   double precision, intent(out)  :: energies(N_st_diag)
   
-  integer                        :: sze_8
   integer                        :: iter
   integer                        :: i,j,k,l,m
   logical                        :: converged
@@ -138,13 +137,10 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
   enddo
   write(iunit,'(A)') trim(write_buffer)
 
-  integer, external :: align_double
-  sze_8 = align_double(sze)
-
   allocate(                                                          &
-      W(sze_8,N_st_diag,davidson_sze_max),                           &
-      U(sze_8,N_st_diag,davidson_sze_max),                           &
-      R(sze_8,N_st_diag),                                            &
+      W(sze,N_st_diag,davidson_sze_max),                           &
+      U(sze,N_st_diag,davidson_sze_max),                           &
+      R(sze,N_st_diag),                                            &
       h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max),      &
       y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max),      &
       residual_norm(N_st_diag),                                      &
@@ -199,7 +195,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
       ! Compute |W_k> = \sum_i |i><i|H|u_k>
       ! -----------------------------------------
       
-      call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze_8)
+      call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze)
       
       
       ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@@ -320,7 +316,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s
 end
 
 
-subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
+subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze)
   use bitmasks
   implicit none
   BEGIN_DOC
@@ -329,16 +325,16 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
   ! n : number of determinants
   !
   END_DOC
-  integer, intent(in)            :: n,Nint,N_st,sze_8
+  integer, intent(in)            :: n,Nint,N_st,sze
   double precision, intent(out)  :: e_0(N_st)
-  double precision, intent(in)   :: u_0(sze_8,N_st)
+  double precision, intent(in)   :: u_0(sze,N_st)
   integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
   integer,intent(in)             :: istate
   
   double precision, allocatable  :: v_0(:,:), H_jj(:)
   double precision               :: u_dot_u,u_dot_v,diag_H_mat_elem
   integer :: i,j
-  allocate(H_jj(n), v_0(sze_8,N_st))
+  allocate(H_jj(n), v_0(sze,N_st))
   do i = 1, n
    H_jj(i) = diag_H_mat_elem(keys_tmp(1,1,i),Nint)
   enddo
@@ -347,7 +343,7 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
     H_jj(idx_ref(i)) +=  delta_ii(istate,i)
   enddo
   
-  call H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate,N_st,sze_8)
+  call H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate,N_st,sze)
   do i=1,N_st
     e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
   enddo
@@ -355,7 +351,7 @@ subroutine u_0_H_u_0_mrcc_nstates(e_0,u_0,n,keys_tmp,Nint,istate,N_st,sze_8)
 end
 
 
-subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
+subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze)
   use bitmasks
   implicit none
   BEGIN_DOC
@@ -365,9 +361,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
   !     
   ! H_jj : array of <j|H|j>
   END_DOC
-  integer, intent(in)            :: n,Nint,istate_in,N_st,sze_8
-  double precision, intent(out)  :: v_0(sze_8,N_st)
-  double precision, intent(in)   :: u_0(sze_8,N_st)
+  integer, intent(in)            :: n,Nint,istate_in,N_st,sze
+  double precision, intent(out)  :: v_0(sze,N_st)
+  double precision, intent(in)   :: u_0(sze,N_st)
   double precision, intent(in)   :: H_jj(n)
   integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
   double precision               :: hij
@@ -396,9 +392,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
   
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze_8,&
+      !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,sze,&
       !$OMP   istate_in,delta_ij,N_det_ref,N_det_non_ref,idx_ref,idx_non_ref)
-  allocate(vt(sze_8,N_st))
+  allocate(vt(sze,N_st))
   Vt = 0.d0
   
   !$OMP DO SCHEDULE(static,1)
@@ -590,7 +586,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   double precision, intent(inout) :: u_in(dim_in,N_st_diag)
   double precision, intent(out)  :: energies(N_st_diag)
   
-  integer                        :: sze_8
+  integer                        :: sze
   integer                        :: iter
   integer                        :: i,j,k,l,m
   logical                        :: converged
@@ -649,14 +645,11 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   enddo
   write(iunit,'(A)') trim(write_buffer)
 
-  integer, external :: align_double
-  sze_8 = align_double(sze)
-
   itermax = min(davidson_sze_max, sze/N_st_diag)
   allocate(                                                          &
-      W(sze_8,N_st_diag*itermax),                                    &
-      U(sze_8,N_st_diag*itermax),                                    &
-      S(sze_8,N_st_diag*itermax),                                    &
+      W(sze,N_st_diag*itermax),                                    &
+      U(sze,N_st_diag*itermax),                                    &
+      S(sze,N_st_diag*itermax),                                    &
       h(N_st_diag*itermax,N_st_diag*itermax),                        &
       y(N_st_diag*itermax,N_st_diag*itermax),                        &
       s_(N_st_diag*itermax,N_st_diag*itermax),                       &
@@ -722,7 +715,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
       ! -----------------------------------------
        
       call H_S2_u_0_mrcc_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,&
-        istate,N_st_diag,sze_8)
+        istate,N_st_diag,sze)
       
       
       ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@@ -960,7 +953,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 end
 
 
-subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8)
+subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze)
   use bitmasks
   implicit none
   BEGIN_DOC
@@ -972,9 +965,9 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
   !
   ! S2_jj : array of <j|S^2|j>
   END_DOC
-  integer, intent(in)            :: N_st,n,Nint, sze_8, istate_in
-  double precision, intent(out)  :: v_0(sze_8,N_st), s_0(sze_8,N_st)
-  double precision, intent(in)   :: u_0(sze_8,N_st)
+  integer, intent(in)            :: N_st,n,Nint, sze, istate_in
+  double precision, intent(out)  :: v_0(sze,N_st), s_0(sze,N_st)
+  double precision, intent(in)   :: u_0(sze,N_st)
   double precision, intent(in)   :: H_jj(n), S2_jj(n)
   integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
   double precision               :: hij,s2
@@ -987,20 +980,16 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
   integer(bit_kind)              :: sorted_i(Nint)
 
   integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
-  integer                        :: N_st_8
 
-  integer, external              :: align_double
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
 
-  N_st_8 = align_double(N_st)
-
   ASSERT (Nint > 0)
   ASSERT (Nint == N_int)
   ASSERT (n>0)
   PROVIDE ref_bitmask_energy
 
   allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
-  allocate(ut(N_st_8,n))
+  allocate(ut(N_st,n))
 
   v_0 = 0.d0
   s_0 = 0.d0
@@ -1017,9 +1006,9 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
   PROVIDE delta_ij_s2 
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, &
+      !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st, &
       !$OMP  N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in)
-  allocate(vt(N_st_8,n),st(N_st_8,n))
+  allocate(vt(N_st,n),st(N_st,n))
   Vt = 0.d0
   St = 0.d0
 
diff --git a/plugins/Perturbation/pt2_equations.irp.f b/plugins/Perturbation/pt2_equations.irp.f
index 1cd0d440..57b3a814 100644
--- a/plugins/Perturbation/pt2_equations.irp.f
+++ b/plugins/Perturbation/pt2_equations.irp.f
@@ -314,7 +314,7 @@ subroutine pt2_epstein_nesbet_SC2_projected ($arguments)
 
   degree = popcnt(xor( ref_bitmask(1,1), det_pert(1,1))) +                      &
            popcnt(xor( ref_bitmask(1,2), det_pert(1,2)))
-  !DEC$ NOUNROLL
+  !DIR$ NOUNROLL
   do l=2,Nint
     degree = degree+ popcnt(xor( ref_bitmask(l,1), det_pert(l,1))) +            &
                      popcnt(xor( ref_bitmask(l,2), det_pert(l,2)))
diff --git a/plugins/Properties/average.irp.f b/plugins/Properties/average.irp.f
index b57c8ef2..5672277d 100644
--- a/plugins/Properties/average.irp.f
+++ b/plugins/Properties/average.irp.f
@@ -1,7 +1,7 @@
 subroutine get_average(array,density,average)
  implicit none
- double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num)
- double precision, intent(in) :: density(mo_tot_num_align,mo_tot_num)
+ double precision, intent(in) :: array(mo_tot_num,mo_tot_num)
+ double precision, intent(in) :: density(mo_tot_num,mo_tot_num)
  double precision, intent(out):: average
  integer :: i,j
  BEGIN_DOC
diff --git a/plugins/Properties/delta_rho.irp.f b/plugins/Properties/delta_rho.irp.f
index 11375b87..e701377b 100644
--- a/plugins/Properties/delta_rho.irp.f
+++ b/plugins/Properties/delta_rho.irp.f
@@ -73,7 +73,7 @@ END_PROVIDER
 
 
 
-BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_all_points, (ao_num_align, ao_num, N_z_pts)]
+BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_all_points, (ao_num, ao_num, N_z_pts)]
  BEGIN_DOC
 !  array of the overlap in x,y between the AO function and integrated between [z,z+dz] in the z axis
 !  for all the z points that are given (N_z_pts)
@@ -148,7 +148,7 @@ BEGIN_PROVIDER [integer, i_unit_integrated_delta_rho]
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num_align, ao_num )]
+BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num, ao_num )]
  BEGIN_DOC
 !  array of the overlap in x,y between the AO function and integrated between [z,z+dz] in the z axis
 !  for one specific z point
@@ -209,7 +209,7 @@ BEGIN_PROVIDER [ double precision, ao_integrated_delta_rho_one_point, (ao_num_al
  !$OMP END PARALLEL DO
 END_PROVIDER
 
-BEGIN_PROVIDER [double precision, mo_integrated_delta_rho_one_point, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_integrated_delta_rho_one_point, (mo_tot_num,mo_tot_num)]
  BEGIN_DOC
 ! 
 ! array of the integrals needed of integrated_rho(alpha,z) - integrated_rho(beta,z) for z = z_one_point
diff --git a/plugins/Properties/mulliken.irp.f b/plugins/Properties/mulliken.irp.f
index 68b620c5..e27084c3 100644
--- a/plugins/Properties/mulliken.irp.f
+++ b/plugins/Properties/mulliken.irp.f
@@ -1,5 +1,5 @@
 
-BEGIN_PROVIDER [double precision, spin_population, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [double precision, spin_population, (ao_num,ao_num)]
  implicit none
  integer :: i,j
  BEGIN_DOC
@@ -57,8 +57,8 @@ BEGIN_PROVIDER [double precision, mulliken_spin_densities, (nucl_num)]
 
 END_PROVIDER
 
- BEGIN_PROVIDER [double precision, electronic_population_alpha, (ao_num_align,ao_num)]
-&BEGIN_PROVIDER [double precision, electronic_population_beta, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [double precision, electronic_population_alpha, (ao_num,ao_num)]
+&BEGIN_PROVIDER [double precision, electronic_population_beta, (ao_num,ao_num)]
  implicit none
  integer :: i,j
  BEGIN_DOC
diff --git a/plugins/Properties/routines_test.irp.f b/plugins/Properties/routines_test.irp.f
index 231c6f2d..8e06d29d 100644
--- a/plugins/Properties/routines_test.irp.f
+++ b/plugins/Properties/routines_test.irp.f
@@ -2,7 +2,7 @@
 
 subroutine test_average_value(array,value)
  implicit none
- double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num)
+ double precision, intent(in) :: array(mo_tot_num,mo_tot_num)
  double precision, intent(in) :: value
  double precision :: tmp,hij
  integer :: i,j
@@ -24,7 +24,7 @@ end
 
 subroutine test_average_value_alpha_beta(array,value)
  implicit none
- double precision, intent(in) :: array(mo_tot_num_align,mo_tot_num)
+ double precision, intent(in) :: array(mo_tot_num,mo_tot_num)
  double precision, intent(in) :: value
  double precision :: tmp,hij
  integer :: i,j
diff --git a/plugins/Properties/slater_rules_mono_electronic.irp.f b/plugins/Properties/slater_rules_mono_electronic.irp.f
index 3e4d94bc..1d981a15 100644
--- a/plugins/Properties/slater_rules_mono_electronic.irp.f
+++ b/plugins/Properties/slater_rules_mono_electronic.irp.f
@@ -10,7 +10,7 @@ subroutine i_O1_j(array,key_i,key_j,Nint,hij)
   integer, intent(in)            :: Nint
   integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
   double precision, intent(out)  :: hij
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
   
   integer                        :: exc(0:2,2,2)
   integer                        :: degree
@@ -25,7 +25,7 @@ subroutine i_O1_j(array,key_i,key_j,Nint,hij)
   ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
   
   hij = 0.d0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call get_excitation_degree(key_i,key_j,degree,Nint)
   select case (degree)
     case (2)
@@ -53,7 +53,7 @@ subroutine i_O1_psi(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
   use bitmasks
   implicit none
   integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
   integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
   integer(bit_kind), intent(in)  :: key(Nint,2)
   double precision, intent(in)   :: coef(Ndet_max,Nstate)
@@ -80,7 +80,7 @@ subroutine i_O1_psi(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
   call filter_connected_mono(keys,key,Nint,Ndet,idx)
   do ii=1,idx(0)
     i = idx(ii)
-    !DEC$ FORCEINLINE
+    !DIR$ FORCEINLINE
     call i_O1_j(array,keys(1,1,i),key,Nint,hij)
     do j = 1, Nstate
       i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij
@@ -96,7 +96,7 @@ double precision function diag_O1_mat_elem(array,det_in,Nint)
   END_DOC
   integer,intent(in)             :: Nint
   integer(bit_kind),intent(in)   :: det_in(Nint,2)
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
   
   integer                        :: i, ispin,tmp
   integer                        :: occ_det(Nint*bit_kind_size,2)
@@ -120,7 +120,7 @@ subroutine i_O1_psi_alpha_beta(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H
   use bitmasks
   implicit none
   integer, intent(in)            :: Nint, Ndet,Ndet_max,Nstate
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
   integer(bit_kind), intent(in)  :: keys(Nint,2,Ndet)
   integer(bit_kind), intent(in)  :: key(Nint,2)
   double precision, intent(in)   :: coef(Ndet_max,Nstate)
@@ -147,7 +147,7 @@ subroutine i_O1_psi_alpha_beta(array,key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H
   call filter_connected_mono(keys,key,Nint,Ndet,idx)
   do ii=1,idx(0)
     i = idx(ii)
-    !DEC$ FORCEINLINE
+    !DIR$ FORCEINLINE
     call i_O1_j_alpha_beta(array,keys(1,1,i),key,Nint,hij)
     do j = 1, Nstate
       i_H_psi_array(j) = i_H_psi_array(j) + coef(i,j)*hij
@@ -167,7 +167,7 @@ subroutine i_O1_j_alpha_beta(array,key_i,key_j,Nint,hij)
   integer, intent(in)            :: Nint
   integer(bit_kind), intent(in)  :: key_i(Nint,2), key_j(Nint,2)
   double precision, intent(out)  :: hij
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
   
   integer                        :: exc(0:2,2,2)
   integer                        :: degree
@@ -182,7 +182,7 @@ subroutine i_O1_j_alpha_beta(array,key_i,key_j,Nint,hij)
   ASSERT (sum(popcnt(key_j(:,2))) == elec_beta_num)
   
   hij = 0.d0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call get_excitation_degree(key_i,key_j,degree,Nint)
   select case (degree)
     case (2)
@@ -215,7 +215,7 @@ double precision function diag_O1_mat_elem_alpha_beta(array,det_in,Nint)
   END_DOC
   integer,intent(in)             :: Nint
   integer(bit_kind),intent(in)   :: det_in(Nint,2)
-  double precision, intent(in)   :: array(mo_tot_num_align,mo_tot_num)
+  double precision, intent(in)   :: array(mo_tot_num,mo_tot_num)
   
   integer                        :: i, ispin,tmp
   integer                        :: occ_det(Nint*bit_kind_size,2)
@@ -319,7 +319,7 @@ subroutine filter_connected_mono(key1,key2,Nint,sze,idx)
     !DIR$ LOOP COUNT (1000)
     do i=1,sze
       degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
       do j=1,Nint
         degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
             popcnt(xor( key1(j,2,i), key2(j,2)))
diff --git a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
index 8b137891..1814a42f 100644
--- a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
+++ b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
@@ -1 +1 @@
-
+Selectors_Utils
diff --git a/plugins/Selectors_CASSD/selectors.irp.f b/plugins/Selectors_CASSD/selectors.irp.f
index 31a8ab4f..167ec66e 100644
--- a/plugins/Selectors_CASSD/selectors.irp.f
+++ b/plugins/Selectors_CASSD/selectors.irp.f
@@ -1,10 +1,5 @@
 use bitmasks
 
-BEGIN_PROVIDER [ integer, psi_selectors_size ]
- implicit none
- psi_selectors_size = psi_det_size
-END_PROVIDER
-
 BEGIN_PROVIDER [ integer, N_det_selectors]
   implicit none
   BEGIN_DOC
@@ -66,30 +61,4 @@ END_PROVIDER
   endif
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Transposed psi_selectors
-  END_DOC
-  integer                        :: i,k
-
-  do i=1,N_det_selectors
-    do k=1,N_states
-      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
-    enddo
-  enddo
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Diagonal elements of the H matrix for each selectors 
-  END_DOC
-  integer                        :: i
-  double precision :: diag_H_mat_elem
-  do i = 1, N_det_selectors
-   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
-  enddo
-END_PROVIDER
-
 
diff --git a/plugins/Selectors_CASSD/zmq.irp.f b/plugins/Selectors_CASSD/zmq.irp.f
deleted file mode 100644
index 2d4987d3..00000000
--- a/plugins/Selectors_CASSD/zmq.irp.f
+++ /dev/null
@@ -1,121 +0,0 @@
-subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id, energy, size_energy)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Put the wave function on the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  integer, intent(in)            :: size_energy
-  double precision, intent(out)  :: energy(size_energy)
-  integer                        :: rc
-  integer*8                      :: rc8
-  character*(256)                :: msg
-
-  write(msg,*) 'put_psi ', worker_id, N_states, N_det, psi_det_size, n_det_generators, n_det_selectors
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
-  if (rc /= len(trim(msg))) then
-    print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,ZMQ_SNDMORE)
-  if (rc8 /= N_int*2_8*N_det*bit_kind) then
-    print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)
-  if (rc8 /= psi_det_size*N_states*8_8) then
-    print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)
-  if (rc /= size_energy*8) then
-    print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:rc) /= 'put_psi_reply 1') then
-    print *,  rc, trim(msg)
-    print *,  'Error in put_psi_reply'
-    stop 'error'
-  endif
-
-end
-
-
-
-subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Get the wave function from the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  integer, intent(in)            :: size_energy
-  double precision, intent(out)  :: energy(size_energy)
-  integer                        :: rc
-  integer*8                      :: rc8
-  character*(64)                 :: msg
-
-  write(msg,*) 'get_psi ', worker_id
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
-  if (rc /= len(trim(msg))) then
-    print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:13) /= 'get_psi_reply') then
-    print *,  rc, trim(msg)
-    print *,  'Error in get_psi_reply'
-    stop 'error'
-  endif
-  
-  integer :: N_states_read, N_det_read, psi_det_size_read
-  integer :: N_det_selectors_read, N_det_generators_read
-  read(msg(14:rc),*) N_states_read, N_det_read, psi_det_size_read, &
-    N_det_generators_read, N_det_selectors_read
-
-  N_states = N_states_read
-  N_det    = N_det_read
-  psi_det_size = psi_det_size_read
-  TOUCH psi_det_size N_det N_states
-
-  rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det*bit_kind,0)
-  if (rc8 /= N_int*2_8*N_det*bit_kind) then
-    print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-
-  rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,0)
-  if (rc8 /= psi_det_size*N_states*8_8) then
-    print *, '77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8_8,ZMQ_SNDMORE)'
-    stop 'error'
-  endif
-  TOUCH psi_det psi_coef
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)
-  if (rc /= size_energy*8) then
-    print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)'
-    stop 'error'
-  endif
-
-  if (N_det_generators_read > 0) then
-    N_det_generators = N_det_generators_read
-    TOUCH N_det_generators
-  endif
-  if (N_det_selectors_read > 0) then
-    N_det_selectors  = N_det_selectors_read
-    TOUCH N_det_selectors
-  endif
-
-end
-
-
diff --git a/plugins/Selectors_full/NEEDED_CHILDREN_MODULES b/plugins/Selectors_full/NEEDED_CHILDREN_MODULES
index 54f54203..d2716281 100644
--- a/plugins/Selectors_full/NEEDED_CHILDREN_MODULES
+++ b/plugins/Selectors_full/NEEDED_CHILDREN_MODULES
@@ -1 +1 @@
-Determinants Hartree_Fock
+Determinants Hartree_Fock Selectors_Utils
diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f
index e8e746c8..42e3c87b 100644
--- a/plugins/Selectors_full/selectors.irp.f
+++ b/plugins/Selectors_full/selectors.irp.f
@@ -1,10 +1,5 @@
 use bitmasks
 
-BEGIN_PROVIDER [ integer, psi_selectors_size ]
- implicit none
- psi_selectors_size = psi_det_size
-END_PROVIDER
-
 BEGIN_PROVIDER [ integer, N_det_selectors]
   implicit none
   BEGIN_DOC
@@ -50,30 +45,4 @@ END_PROVIDER
   enddo
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Transposed psi_selectors
-  END_DOC
-  integer                        :: i,k
-
-  do i=1,N_det_selectors
-    do k=1,N_states
-      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
-    enddo
-  enddo
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Diagonal elements of the H matrix for each selectors 
-  END_DOC
-  integer                        :: i
-  double precision :: diag_H_mat_elem
-  do i = 1, N_det_selectors
-   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
-  enddo
-END_PROVIDER
-
 
diff --git a/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES b/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES
index aae89501..27283c7d 100644
--- a/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES
+++ b/plugins/Selectors_no_sorted/NEEDED_CHILDREN_MODULES
@@ -1 +1 @@
-Determinants
+Determinants Selectors_Utils
diff --git a/plugins/Selectors_no_sorted/e_corr_selectors.irp.f b/plugins/Selectors_no_sorted/e_corr_selectors.irp.f
deleted file mode 100644
index 952e1c23..00000000
--- a/plugins/Selectors_no_sorted/e_corr_selectors.irp.f
+++ /dev/null
@@ -1,79 +0,0 @@
-
-use bitmasks
- BEGIN_PROVIDER [integer, exc_degree_per_selectors, (N_det_selectors)]
-&BEGIN_PROVIDER [integer, double_index_selectors, (N_det_selectors)]
-&BEGIN_PROVIDER [integer, n_double_selectors]
- implicit none
- BEGIN_DOC
- ! degree of excitation respect to Hartree Fock for the wave function
- !
- ! for the all the selectors determinants
- !
- ! double_index_selectors = list of the index of the double excitations
- ! 
- ! n_double_selectors = number of double excitations in the selectors determinants
- END_DOC
- integer :: i,degree
- n_double_selectors = 0
- do i = 1, N_det_selectors
-  call get_excitation_degree(psi_selectors(1,1,i),ref_bitmask,degree,N_int)
-  exc_degree_per_selectors(i) = degree
-  if(degree==2)then
-   n_double_selectors += 1
-   double_index_selectors(n_double_selectors) =i
-  endif
- enddo
-END_PROVIDER
-
-  BEGIN_PROVIDER[double precision, coef_hf_selector]
- &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf]
- &BEGIN_PROVIDER[double precision, inv_selectors_coef_hf_squared]
- &BEGIN_PROVIDER[double precision, E_corr_per_selectors, (N_det_selectors)]
- &BEGIN_PROVIDER[double precision, i_H_HF_per_selectors, (N_det_selectors)]
- &BEGIN_PROVIDER[double precision, Delta_E_per_selector, (N_det_selectors)]
- &BEGIN_PROVIDER[double precision, E_corr_double_only ]
- &BEGIN_PROVIDER[double precision, E_corr_second_order ]
- implicit none
- BEGIN_DOC
- ! energy of correlation per determinant respect to the Hartree Fock determinant
- !
- ! for the all the double excitations in the selectors determinants
- !
- ! E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
- ! 
- ! E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation 
- !
- ! coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
- END_DOC
- PROVIDE  ref_bitmask_energy psi_selectors ref_bitmask N_int psi_selectors
- integer :: i,degree
- double precision :: hij,diag_H_mat_elem
- E_corr_double_only = 0.d0
- E_corr_second_order = 0.d0
- do i = 1, N_det_selectors
-  if(exc_degree_per_selectors(i)==2)then
-   call i_H_j(ref_bitmask,psi_selectors(1,1,i),N_int,hij)
-   i_H_HF_per_selectors(i) = hij
-   E_corr_per_selectors(i) = psi_selectors_coef(i,1) * hij
-   E_corr_double_only += E_corr_per_selectors(i)
-   E_corr_second_order += hij * hij /(ref_bitmask_energy - diag_H_mat_elem(psi_selectors(1,1,i),N_int))
-  elseif(exc_degree_per_selectors(i) == 0)then
-   coef_hf_selector = psi_selectors_coef(i,1)
-   E_corr_per_selectors(i) = -1000.d0
-   Delta_E_per_selector(i) = 0.d0
-  else
-   E_corr_per_selectors(i) = -1000.d0
-  endif
- enddo
- if (dabs(coef_hf_selector) > 1.d-8) then
-   inv_selectors_coef_hf = 1.d0/coef_hf_selector
-   inv_selectors_coef_hf_squared = inv_selectors_coef_hf * inv_selectors_coef_hf
- else
-   inv_selectors_coef_hf = 0.d0
-   inv_selectors_coef_hf_squared = 0.d0
- endif
- do i = 1,n_double_selectors
-  E_corr_per_selectors(double_index_selectors(i)) *=inv_selectors_coef_hf
- enddo
- E_corr_double_only = E_corr_double_only * inv_selectors_coef_hf
- END_PROVIDER
diff --git a/plugins/Selectors_no_sorted/selectors.irp.f b/plugins/Selectors_no_sorted/selectors.irp.f
index 83a8d472..3ac8218d 100644
--- a/plugins/Selectors_no_sorted/selectors.irp.f
+++ b/plugins/Selectors_no_sorted/selectors.irp.f
@@ -1,12 +1,5 @@
-
 use bitmasks
 
-
-BEGIN_PROVIDER [ integer, psi_selectors_size ]
- implicit none
- psi_selectors_size = psi_det_size
-END_PROVIDER
-
 BEGIN_PROVIDER [ integer, N_det_selectors]
  implicit none
  BEGIN_DOC
@@ -45,16 +38,3 @@ END_PROVIDER
   enddo
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
-  implicit none
-  BEGIN_DOC
-  ! Diagonal elements of the H matrix for each selectors 
-  END_DOC
-  integer                        :: i
-  double precision :: diag_H_mat_elem
-  do i = 1, N_det_selectors
-   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
-  enddo
- END_PROVIDER
-
-
diff --git a/plugins/read_integral/read_integrals_mo.irp.f b/plugins/read_integral/read_integrals_mo.irp.f
index 5376b2a2..06db0ddf 100644
--- a/plugins/read_integral/read_integrals_mo.irp.f
+++ b/plugins/read_integral/read_integrals_mo.irp.f
@@ -28,7 +28,7 @@ subroutine run
    
   call ezfio_get_mo_basis_mo_tot_num(mo_tot_num)
 
-  allocate (A(mo_tot_num_align,mo_tot_num))
+  allocate (A(mo_tot_num,mo_tot_num))
   A = 0.d0
   
   iunit = getunitandopen('kinetic_mo','r')
diff --git a/src/AO_Basis/ao_overlap.irp.f b/src/AO_Basis/ao_overlap.irp.f
index 83110293..72aaaca0 100644
--- a/src/AO_Basis/ao_overlap.irp.f
+++ b/src/AO_Basis/ao_overlap.irp.f
@@ -1,7 +1,7 @@
- BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num_align,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num_align,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num_align,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num_align,ao_num) ]
+ BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num,ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num,ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num,ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num,ao_num) ]
   implicit none
   BEGIN_DOC  
 ! Overlap between atomic basis functions:
@@ -34,8 +34,6 @@
     power_A(1)  = ao_power( j, 1 )
     power_A(2)  = ao_power( j, 2 )
     power_A(3)  = ao_power( j, 3 )
-    !DEC$ VECTOR ALIGNED
-    !DEC$ VECTOR ALWAYS
     do i= 1,ao_num
       ao_overlap(i,j)= 0.d0
       ao_overlap_x(i,j)= 0.d0
@@ -49,7 +47,6 @@
       power_B(3)  = ao_power( i, 3 )
       do n = 1,ao_prim_num(j)
       alpha = ao_expo_ordered_transp(n,j)
-      !DEC$ VECTOR ALIGNED
       do l = 1, ao_prim_num(i)
         beta = ao_expo_ordered_transp(l,i)
         call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
@@ -72,7 +69,7 @@
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ]
   implicit none
   BEGIN_DOC  
 ! Overlap between absolute value of atomic basis functions:
@@ -103,8 +100,6 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ]
    power_A(1)  = ao_power( j, 1 )
    power_A(2)  = ao_power( j, 2 )
    power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
    do i= 1,ao_num
     ao_overlap_abs(i,j)= 0.d0
     B_center(1) = nucl_coord( ao_nucl(i), 1 )
@@ -115,7 +110,6 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num_align,ao_num) ]
     power_B(3)  = ao_power( i, 3 )
     do n = 1,ao_prim_num(j)
      alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
      do l = 1, ao_prim_num(i)
       beta = ao_expo_ordered_transp(l,i)
       call overlap_x_abs(A_center(1),B_center(1),alpha,beta,power_A(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
diff --git a/src/AO_Basis/aos.irp.f b/src/AO_Basis/aos.irp.f
index 871c0ee6..5d255a00 100644
--- a/src/AO_Basis/aos.irp.f
+++ b/src/AO_Basis/aos.irp.f
@@ -1,14 +1,3 @@
-BEGIN_PROVIDER [ integer, ao_num_align ]
-   implicit none
-   
-   BEGIN_DOC
-   ! Number of atomic orbitals align
-   END_DOC
-   
-   integer                        :: align_double
-   ao_num_align = align_double(ao_num)
-END_PROVIDER 
-
 BEGIN_PROVIDER [ integer, ao_prim_num_max ]
  implicit none
  ao_prim_num_max = 0
@@ -16,7 +5,7 @@ BEGIN_PROVIDER [ integer, ao_prim_num_max ]
  call ezfio_get_ao_basis_ao_prim_num_max(ao_prim_num_max)
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num_align,ao_prim_num_max) ]
+ BEGIN_PROVIDER [ double precision, ao_coef_normalized, (ao_num,ao_prim_num_max) ]
 &BEGIN_PROVIDER [ double precision, ao_coef_normalization_factor, (ao_num) ]
   implicit none
   BEGIN_DOC
@@ -86,8 +75,8 @@ BEGIN_PROVIDER [ double precision, ao_coef_normalization_libint_factor, (ao_num)
 END_PROVIDER
 
 
- BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num_align,ao_prim_num_max) ]
-&BEGIN_PROVIDER [ double precision, ao_expo_ordered, (ao_num_align,ao_prim_num_max) ]
+ BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered, (ao_num,ao_prim_num_max) ]
+&BEGIN_PROVIDER [ double precision, ao_expo_ordered, (ao_num,ao_prim_num_max) ]
    implicit none
    BEGIN_DOC
    ! Sorted primitives to accelerate 4 index MO transformation
@@ -112,7 +101,7 @@ END_PROVIDER
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_num_max_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_num_max,ao_num) ]
   implicit none
   BEGIN_DOC
   ! Transposed ao_coef_normalized_ordered
@@ -126,7 +115,7 @@ BEGIN_PROVIDER [ double precision, ao_coef_normalized_ordered_transp, (ao_prim_n
   
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_expo_ordered_transp, (ao_prim_num_max_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, ao_expo_ordered_transp, (ao_prim_num_max,ao_num) ]
   implicit none
   BEGIN_DOC
   ! Transposed ao_expo_ordered
@@ -155,16 +144,6 @@ END_PROVIDER
  ao_l_max = maxval(ao_l)
 END_PROVIDER
 
-BEGIN_PROVIDER [ integer, ao_prim_num_max_align ]
- implicit none
- BEGIN_DOC
-! Number of primitives per atomic orbital aligned
- END_DOC
-
- integer :: align_double
- ao_prim_num_max_align = align_double(ao_prim_num_max)
-END_PROVIDER
-
 integer function ao_power_index(nx,ny,nz)
   implicit none
   integer, intent(in)            :: nx, ny, nz
diff --git a/src/Bitmask/mpi.irp.f b/src/Bitmask/mpi.irp.f
new file mode 100644
index 00000000..18af1ca3
--- /dev/null
+++ b/src/Bitmask/mpi.irp.f
@@ -0,0 +1,44 @@
+BEGIN_PROVIDER [ integer, mpi_bit_kind ]
+ use bitmasks
+ implicit none
+ BEGIN_DOC
+ ! MPI bit kind type
+ END_DOC
+ IRP_IF MPI
+  include 'mpif.h'
+  if (bit_kind == 4) then
+    mpi_bit_kind = MPI_INTEGER4
+  else if (bit_kind == 8) then
+    mpi_bit_kind = MPI_INTEGER8
+  else
+    stop 'Wrong bit kind in mpi_bit_kind'
+  endif
+ IRP_ELSE
+  mpi_bit_kind = -1
+ IRP_ENDIF
+END_PROVIDER
+
+subroutine broadcast_chunks_bit_kind(A, LDA)
+  use bitmasks
+  implicit none
+  integer, intent(in)             :: LDA
+  integer(bit_kind), intent(inout) :: A(LDA)
+  BEGIN_DOC
+! Broadcast with chunks of ~2GB
+  END_DOC
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: i, sze, ierr
+    do i=1,LDA,200000000/bit_kind_size
+      sze = min(LDA-i+1, 200000000/bit_kind_size)
+      call MPI_BCAST (A(i), sze, MPI_BIT_KIND, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        print *,  irp_here//': Unable to broadcast chunks bit_kind', i
+        stop -1
+      endif
+    enddo
+  IRP_ENDIF
+end
+
+
+
diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f
index 29a04596..ec2440d5 100644
--- a/src/Davidson/davidson_parallel.irp.f
+++ b/src/Davidson/davidson_parallel.irp.f
@@ -371,3 +371,69 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ]
  call write_int(6,nthreads_davidson,'Number of threads for Diagonalization')
 END_PROVIDER
 
+
+subroutine zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put N_states_diag on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(256)                :: msg
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'N_states_diag'
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error sending N_states_diag'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,N_states_diag,4,0)
+  if (rc /= 4) then
+    print *,  irp_here, ': Error sending N_states_diag'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_data_reply ok') then
+    print *,  rc, trim(msg)
+    print *,  irp_here, ': Error in put_data_reply'
+    stop 'error'
+  endif
+
+end
+
+subroutine zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get N_states_diag from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(64)                 :: msg
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'N_states_diag'
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error getting N_states_diag'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) 
+  if (msg(1:14) /= 'get_data_reply') then 
+    print *,  rc, trim(msg) 
+    print *,  irp_here, ': Error in get_data_reply' 
+    stop 'error' 
+  endif 
+ 
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,N_states_diag,4,0) 
+  if (rc /= 4) then 
+    print *, irp_here, ': Error getting N_states_diag'
+    stop 'error' 
+  endif 
+
+end
diff --git a/src/Davidson/diagonalization.irp.f b/src/Davidson/diagonalization.irp.f
index e4d51198..b95cb946 100644
--- a/src/Davidson/diagonalization.irp.f
+++ b/src/Davidson/diagonalization.irp.f
@@ -363,8 +363,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
   enddo
   write(iunit,'(A)') trim(write_buffer)
 
-  integer, external :: align_double
-
   allocate(                                                          &
       kl_pairs(2,N_st_diag*(N_st_diag+1)/2),                         &
       W(sze,N_st_diag,davidson_sze_max),                           &
diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f
index 7f9cb889..2dfe468e 100644
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@@ -113,7 +113,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
     stop -1
   endif
   
-  integer, external              :: align_double
   itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
   
   PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
diff --git a/src/Davidson/u0Hu0_old.irp.f b/src/Davidson/u0Hu0_old.irp.f
deleted file mode 100644
index 5fc68f04..00000000
--- a/src/Davidson/u0Hu0_old.irp.f
+++ /dev/null
@@ -1,518 +0,0 @@
-
-subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  BEGIN_DOC
-  ! Computes v_0 = H|u_0> 
-  !
-  ! n : number of determinants
-  !
-  ! H_jj : array of <j|H|j>
-  !
-  END_DOC
-  integer, intent(in)            :: N_st,n,Nint, sze
-  double precision, intent(out)  :: v_0(sze,N_st)
-  double precision, intent(in)   :: u_0(sze,N_st)
-  double precision, intent(in)   :: H_jj(n)
-  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
-  double precision               :: hij,s2 
-  double precision, allocatable  :: vt(:,:), ut(:,:), st(:,:)
-  integer                        :: i,j,k,l, jj,ii
-  integer                        :: i0, j0
-  
-  integer, allocatable           :: shortcut(:,:), sort_idx(:,:)
-  integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
-  integer(bit_kind)              :: sorted_i(Nint)
-  
-  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
-  integer                        :: N_st_8
-  
-  integer, external              :: align_double
-  
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
-
-  N_st_8 = align_double(N_st)
-
-  ASSERT (Nint > 0)
-  ASSERT (Nint == N_int)
-  ASSERT (n>0)
-  PROVIDE ref_bitmask_energy
-
-  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
-  allocate( ut(N_st_8,n))
-
-  v_0 = 0.d0
-
-  call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
-  call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
-
-  !$OMP PARALLEL DEFAULT(NONE)                                       &
-      !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) 
-  allocate(vt(N_st_8,n),st(N_st_8,n))
-  Vt = 0.d0
-  St = 0.d0
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,2),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,1)
-  do sh=1,shortcut(0,2)
-    do i=shortcut(sh,2),shortcut(sh+1,2)-1
-      org_i = sort_idx(i,2)
-      do j=shortcut(sh,2),shortcut(sh+1,2)-1
-        org_j = sort_idx(j,2)
-        ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
-          if (ext > 4) exit
-        end do
-        if(ext == 4) then
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-            enddo
-        end if
-      end do
-    end do
-  enddo
-  !$OMP END DO
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,1),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,1)
-  do sh=1,shortcut(0,1)
-    do sh2=1,shortcut(0,1)
-      if (sh==sh2) cycle
-
-      exa = 0
-      do ni=1,Nint
-        exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
-      end do
-      if(exa > 2) then
-        cycle
-      end if
-
-      do i=shortcut(sh,1),shortcut(sh+1,1)-1
-        org_i = sort_idx(i,1)
-        do ni=1,Nint
-          sorted_i(ni) = sorted(ni,i,1)
-        enddo
-
-        do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
-          ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-          if (ext > 4) cycle
-          do ni=2,Nint
-            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-            if (ext > 4) exit
-          end do
-          if(ext <= 4) then
-            org_j = sort_idx(j,1)
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            if (hij /= 0.d0) then
-              do istate=1,n_st
-                vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              enddo
-            endif
-            if (ext /= 2) then
-              call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-              if (s2 /= 0.d0) then
-                do istate=1,n_st
-                  st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-                enddo
-              endif
-            endif
-          endif
-        enddo
-        
-      enddo
-    enddo
-
-    exa = 0
-
-    do i=shortcut(sh,1),shortcut(sh+1,1)-1
-      org_i = sort_idx(i,1)
-      do ni=1,Nint
-        sorted_i(ni) = sorted(ni,i,1)
-      enddo
-
-      do j=shortcut(sh,1),i-1
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-      
-      do j=i+1,shortcut(sh+1,1)-1
-        if (i==j) cycle
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-    enddo
-  enddo
-  !$OMP END DO 
-
-  do istate=1,N_st
-    do i=1,n
-      !$OMP ATOMIC
-      v_0(i,istate) = v_0(i,istate) + vt(istate,i)
-    enddo
-  enddo
-
-  deallocate(vt,st)
-  !$OMP END PARALLEL
-
-  do istate=1,N_st
-    do i=1,n
-      v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
-    enddo
-  enddo
-  deallocate (shortcut, sort_idx, sorted, version, ut)
-end
-
-
-
-
-
-subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  BEGIN_DOC
-  ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
-  !
-  ! n : number of determinants
-  !
-  ! H_jj : array of <j|H|j>
-  !
-  ! S2_jj : array of <j|S^2|j>
-  END_DOC
-  integer, intent(in)            :: N_st,n,Nint, sze
-  double precision, intent(out)  :: v_0(sze,N_st), s_0(sze,N_st)
-  double precision, intent(in)   :: u_0(sze,N_st)
-  double precision, intent(in)   :: H_jj(n), S2_jj(n)
-  integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
-  double precision               :: hij,s2 
-  double precision, allocatable  :: vt(:,:), ut(:,:), st(:,:)
-  integer                        :: i,j,k,l, jj,ii
-  integer                        :: i0, j0
-  
-  integer, allocatable           :: shortcut(:,:), sort_idx(:,:)
-  integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
-  integer(bit_kind)              :: sorted_i(Nint)
-  
-  integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
-  integer                        :: N_st_8
-  
-  integer, external              :: align_double
-  
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
-
-  N_st_8 = align_double(N_st)
-
-  ASSERT (Nint > 0)
-  ASSERT (Nint == N_int)
-  ASSERT (n>0)
-  PROVIDE ref_bitmask_energy
-
-  allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
-  allocate( ut(N_st_8,n))
-
-  v_0 = 0.d0
-  s_0 = 0.d0
-
-  call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
-  call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
-
-  !$OMP PARALLEL DEFAULT(NONE)                                       &
-      !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) 
-  allocate(vt(N_st_8,n),st(N_st_8,n))
-  Vt = 0.d0
-  St = 0.d0
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,2),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,4)
-  do sh=1,shortcut(0,2)
-    do i=shortcut(sh,2),shortcut(sh+1,2)-1
-      org_i = sort_idx(i,2)
-      do j=shortcut(sh,2),shortcut(sh+1,2)-1
-        org_j = sort_idx(j,2)
-        ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
-          if (ext > 4) exit
-        end do
-        if(ext == 4) then
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-            enddo
-        end if
-      end do
-    end do
-  enddo
-  !$OMP END DO
-
-  !$OMP DO
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(sort_idx(i,1),istate)
-    enddo
-  enddo
-  !$OMP END DO
-
-  !$OMP DO SCHEDULE(static,4)
-  do sh=1,shortcut(0,1)
-    do sh2=1,shortcut(0,1)
-      if (sh==sh2) cycle
-
-      exa = 0
-      do ni=1,Nint
-        exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
-      end do
-      if(exa > 2) then
-        cycle
-      end if
-
-      do i=shortcut(sh,1),shortcut(sh+1,1)-1
-        org_i = sort_idx(i,1)
-        do ni=1,Nint
-          sorted_i(ni) = sorted(ni,i,1)
-        enddo
-
-        do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
-          ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-          if (ext > 4) cycle
-          do ni=2,Nint
-            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-            if (ext > 4) exit
-          end do
-          if(ext <= 4) then
-            org_j = sort_idx(j,1)
-            call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-            if (hij /= 0.d0) then
-              do istate=1,n_st
-                vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-              enddo
-            endif
-            if (ext /= 2) then
-              call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-              if (s2 /= 0.d0) then
-                do istate=1,n_st
-                  st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-                enddo
-              endif
-            endif
-          endif
-        enddo
-        
-      enddo
-    enddo
-
-    exa = 0
-
-    do i=shortcut(sh,1),shortcut(sh+1,1)-1
-      org_i = sort_idx(i,1)
-      do ni=1,Nint
-        sorted_i(ni) = sorted(ni,i,1)
-      enddo
-
-      do j=shortcut(sh,1),i-1
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-      
-      do j=i+1,shortcut(sh+1,1)-1
-        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
-        if (ext > 4) cycle
-        do ni=2,Nint
-          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          if (ext > 4) exit
-        end do
-        if(ext <= 4) then
-          org_j = sort_idx(j,1)
-          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
-          if (hij /= 0.d0) then
-            do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
-            enddo
-          endif
-          if (ext /= 2) then
-            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
-            if (s2 /= 0.d0) then
-              do istate=1,n_st
-                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
-              enddo
-            endif
-          endif
-        endif
-      enddo
-    enddo
-  enddo
-  !$OMP END DO 
-
-  do istate=1,N_st
-    do i=1,n
-      !$OMP ATOMIC
-      v_0(i,istate) = v_0(i,istate) + vt(istate,i)
-      !$OMP ATOMIC
-      s_0(i,istate) = s_0(i,istate) + st(istate,i)
-    enddo
-  enddo
-
-  deallocate(vt,st)
-  !$OMP END PARALLEL
-
-  do istate=1,N_st
-    do i=1,n
-      v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
-      s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
-    enddo
-  enddo
-  deallocate (shortcut, sort_idx, sorted, version, ut)
-end
-
-subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
-  use bitmasks
-  implicit none
-  integer, intent(in)            :: N_st,n,Nint, sze
-  integer(bit_kind), intent(in)  :: keys_tmp(Nint,2,n)
-  double precision, intent(inout)   :: v_0(sze,N_st), s_0(sze,N_st)
-  double precision, intent(in)   :: u_0(sze,N_st)
-  double precision, intent(in)   :: H_jj(n), S2_jj(n)
-  
-  PROVIDE ref_bitmask_energy
-
-  double precision, allocatable :: vt(:,:)
-  integer, allocatable :: idx(:)
-  integer                        :: i,j, jj, l
-  double precision               :: hij
-
-  do i=1,n
-    v_0(i,:) = H_jj(i) * u_0(i,:)
-  enddo
-
-  allocate(idx(0:n), vt(N_st,n))
-  Vt = 0.d0
-  !$OMP PARALLEL DO DEFAULT(shared) PRIVATE(i,idx,jj,j,degree,exc,phase,hij,l) SCHEDULE(static,1)
-  do i=2,n
-    idx(0) = i
-    call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx)
-    do jj=1,idx(0)
-      j = idx(jj)
-      double precision :: phase
-      integer :: degree
-      integer :: exc(0:2,2,2)
-      call get_excitation(keys_tmp(1,1,j),keys_tmp(1,1,i),exc,degree,phase,Nint)
-!       if ((degree == 2).and.(exc(0,1,1)==1)) then
-!         continue
-!       else
-!         cycle
-!       endif
-!       if ((degree == 2).and.(exc(0,1,1)==1)) cycle
-!      if ((degree > 1)) cycle
-!      if ((degree == 1)) cycle
-!      if (exc(0,1,2) /= 0) cycle
-!      if (exc(0,1,1) == 2) cycle
-!      if (exc(0,1,2) == 2) cycle
-!      if ((degree==1).and.(exc(0,1,1) == 1)) cycle
-      call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
-      do l=1,N_st
-        !$OMP ATOMIC
-        vt (l,i) = vt (l,i) + hij*u_0(j,l)
-        !$OMP ATOMIC
-        vt (l,j) = vt (l,j) + hij*u_0(i,l)
-      enddo
-    enddo
-  enddo
-  !$OMP END PARALLEL DO
-  do i=1,n
-    v_0(i,:) = v_0(i,:) + vt(:,i)
-  enddo
-end
-
diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f
index 84775770..7ac7b9ce 100644
--- a/src/Determinants/filter_connected.irp.f
+++ b/src/Determinants/filter_connected.irp.f
@@ -78,7 +78,7 @@ subroutine filter_not_connected(key1,key2,Nint,sze,idx)
     !DIR$ LOOP COUNT (1000)
     do i=1,sze
       degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
       do j=1,Nint
         degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
             popcnt(xor( key1(j,2,i), key2(j,2)))
@@ -177,7 +177,7 @@ subroutine filter_connected(key1,key2,Nint,sze,idx)
     !DIR$ LOOP COUNT (1000)
     do i=1,sze
       degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
       do j=1,Nint
         degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
             popcnt(xor( key1(j,2,i), key2(j,2)))
@@ -404,7 +404,7 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx)
     !DIR$ LOOP COUNT (1000)
     outer: do i=1,sze
       degree_x2 = 0
-      !DEC$ LOOP COUNT MIN(4)
+      !DIR$ LOOP COUNT MIN(4)
       do m=1,Nint
         if ( key1(m,1,i) /=  key2(m,1)) then
           degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) 
@@ -454,7 +454,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat)
   integer :: degree
   degree = popcnt(xor( ref_bitmask(1,1), key2(1,1))) +                      &
       popcnt(xor( ref_bitmask(1,2), key2(1,2)))
-  !DEC$ NOUNROLL
+  !DIR$ NOUNROLL
   do m=2,Nint
     degree = degree+ popcnt(xor( ref_bitmask(m,1), key2(m,1))) +            &
         popcnt(xor( ref_bitmask(m,2), key2(m,2)))
@@ -526,7 +526,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat)
      !DIR$ LOOP COUNT (1000)
      do i=1,sze
        degree_x2 = 0
-       !DEC$ LOOP COUNT MIN(4)
+       !DIR$ LOOP COUNT MIN(4)
        do m=1,Nint
          degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) +&
              popcnt(xor( key1(m,2,i), key2(m,2)))
@@ -610,7 +610,7 @@ subroutine filter_connected_i_H_psi0_SC2(key1,key2,Nint,sze,idx,idx_repeat)
      !DIR$ LOOP COUNT (1000)
      do i=1,sze
        degree_x2 = 0
-       !DEC$ LOOP COUNT MIN(4)
+       !DIR$ LOOP COUNT MIN(4)
        do m=1,Nint
          degree_x2 = degree_x2+ popcnt(xor( key1(m,1,i), key2(m,1))) +&
              popcnt(xor( key1(m,2,i), key2(m,2)))
diff --git a/src/Determinants/two_body_dm_map.irp.f b/src/Determinants/two_body_dm_map.irp.f
index 2228b1b5..f2ad69ac 100644
--- a/src/Determinants/two_body_dm_map.irp.f
+++ b/src/Determinants/two_body_dm_map.irp.f
@@ -141,8 +141,7 @@ subroutine add_values_to_two_body_dm_map(mask_ijkl)
      n_elements += 1
      contrib = psi_coef(i,1) * psi_coef(j,1) * phase
      buffer_value(n_elements) = contrib
-     !DEC$ FORCEINLINE
-!    call mo_bielec_integrals_index(h1,p1,h2,p2,buffer_i(n_elements))
+     !DIR$ FORCEINLINE
      call mo_bielec_integrals_index(h1,h2,p1,p2,buffer_i(n_elements))
 !    if (n_elements == size_buffer) then
 !      call insert_into_two_body_dm_ab_map(n_elements,buffer_i,buffer_value,&
diff --git a/src/Integrals_Bielec/ao_bi_integrals.irp.f b/src/Integrals_Bielec/ao_bi_integrals.irp.f
index 4750d5a0..2ee14962 100644
--- a/src/Integrals_Bielec/ao_bi_integrals.irp.f
+++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f
@@ -483,7 +483,6 @@ double precision function general_primitive_integral(dim,            &
   
   accu = 0.d0
   iorder = iorder_p(1)+iorder_q(1)+iorder_p(1)+iorder_q(1)
-  !DIR$ VECTOR ALIGNED
   do ix=0,iorder
     Ix_pol(ix) = 0.d0
   enddo
@@ -494,9 +493,9 @@ double precision function general_primitive_integral(dim,            &
     do jx = 0, iorder_q(1)
       d = a*Q_new(jx,1)
       if (abs(d) < thresh) cycle
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
       call give_polynom_mult_center_x(P_center(1),Q_center(1),ix,jx,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dx,nx)
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
       call add_poly_multiply(dx,nx,d,Ix_pol,n_Ix)
     enddo
   enddo
@@ -504,7 +503,6 @@ double precision function general_primitive_integral(dim,            &
     return
   endif
   iorder = iorder_p(2)+iorder_q(2)+iorder_p(2)+iorder_q(2)
-  !DIR$ VECTOR ALIGNED
   do ix=0, iorder
     Iy_pol(ix) = 0.d0
   enddo
@@ -515,9 +513,9 @@ double precision function general_primitive_integral(dim,            &
       do jy = 0, iorder_q(2)
         e = b*Q_new(jy,2)
         if (abs(e) < thresh) cycle
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
         call   give_polynom_mult_center_x(P_center(2),Q_center(2),iy,jy,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dy,ny)
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
         call add_poly_multiply(dy,ny,e,Iy_pol,n_Iy)
       enddo
     endif
@@ -537,9 +535,9 @@ double precision function general_primitive_integral(dim,            &
       do jz = 0, iorder_q(3)
         f = c*Q_new(jz,3)
         if (abs(f) < thresh) cycle
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
         call   give_polynom_mult_center_x(P_center(3),Q_center(3),iz,jz,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dz,nz)
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
         call add_poly_multiply(dz,nz,f,Iz_pol,n_Iz)
       enddo
     endif
@@ -559,7 +557,7 @@ double precision function general_primitive_integral(dim,            &
     d_poly(i)=0.d0
   enddo
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp)
   if (n_pt_tmp == -1) then
     return
@@ -569,7 +567,7 @@ double precision function general_primitive_integral(dim,            &
     d1(i)=0.d0
   enddo
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
   double precision               :: rint_sum
   accu = accu + rint_sum(n_pt_out,const,d1)
@@ -673,7 +671,6 @@ subroutine integrale_new(I_f,a_x,b_x,c_x,d_x,a_y,b_y,c_y,d_y,a_z,b_z,c_z,d_z,p,q
   sy = iy+jy
   sz = iz+jz
 
-  !DIR$ VECTOR ALIGNED
   do i = 1,n_pt
     B10(i)  = p10_1 -  gauleg_t2(i,j)* p10_2
     B01(i)  = p01_1 -  gauleg_t2(i,j)* p01_2
@@ -682,27 +679,23 @@ subroutine integrale_new(I_f,a_x,b_x,c_x,d_x,a_y,b_y,c_y,d_y,a_z,b_z,c_z,d_z,p,q
   if (sx > 0) then
     call I_x1_new(ix,jx,B10,B01,B00,t1,n_pt)
   else
-    !DIR$ VECTOR ALIGNED
     do i = 1,n_pt
       t1(i) = 1.d0
     enddo
   endif
   if (sy > 0) then
     call I_x1_new(iy,jy,B10,B01,B00,t2,n_pt)
-    !DIR$ VECTOR ALIGNED
     do i = 1,n_pt
       t1(i) = t1(i)*t2(i)
     enddo
   endif
   if (sz > 0) then
     call I_x1_new(iz,jz,B10,B01,B00,t2,n_pt)
-    !DIR$ VECTOR ALIGNED
     do i = 1,n_pt
       t1(i) = t1(i)*t2(i)
     enddo
   endif
   I_f= 0.d0
-  !DIR$ VECTOR ALIGNED
   do i = 1,n_pt
     I_f += gauleg_w(i,j)*t1(i)
   enddo
@@ -724,7 +717,6 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt)
   integer                        :: i
   
   if(c<0)then
-    !DIR$ VECTOR ALIGNED
     do i=1,n_pt
       res(i) = 0.d0
     enddo
@@ -732,14 +724,12 @@ recursive subroutine I_x1_new(a,c,B_10,B_01,B_00,res,n_pt)
     call I_x2_new(c,B_10,B_01,B_00,res,n_pt)
   else if (a==1) then
     call I_x2_new(c-1,B_10,B_01,B_00,res,n_pt)
-    !DIR$ VECTOR ALIGNED
     do i=1,n_pt
       res(i) = c * B_00(i) * res(i)
     enddo
   else
     call I_x1_new(a-2,c,B_10,B_01,B_00,res,n_pt)
     call I_x1_new(a-1,c-1,B_10,B_01,B_00,res2,n_pt)
-    !DIR$ VECTOR ALIGNED
     do i=1,n_pt
       res(i) = (a-1) * B_10(i) * res(i) &
                + c * B_00(i) * res2(i)
@@ -759,18 +749,15 @@ recursive subroutine I_x2_new(c,B_10,B_01,B_00,res,n_pt)
   integer                        :: i
   
   if(c==1)then
-    !DIR$ VECTOR ALIGNED
     do i=1,n_pt
       res(i) = 0.d0
     enddo
   elseif(c==0) then
-    !DIR$ VECTOR ALIGNED
     do i=1,n_pt
       res(i) = 1.d0
     enddo
   else
     call I_x1_new(0,c-2,B_10,B_01,B_00,res,n_pt)
-    !DIR$ VECTOR ALIGNED
     do i=1,n_pt
       res(i) =  (c-1) * B_01(i) * res(i)
     enddo
@@ -906,7 +893,6 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
   integer                        :: nx, ix,iy,ny
   
   ASSERT (a>2)
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     X(ix) = 0.d0
@@ -921,17 +907,15 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
     call I_x1_pol_mult_recurs(a-2,c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
   endif
   
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,nx
     X(ix) *= dble(a-1)
   enddo
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(X,nx,B_10,2,d,nd)
   
   nx = nd
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     X(ix) = 0.d0
@@ -945,19 +929,17 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
       call I_x1_pol_mult_recurs(a-1,c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
     endif
     if (c>1) then
-      !DIR$ VECTOR ALIGNED
       !DIR$ LOOP COUNT(8)
       do ix=0,nx
         X(ix) *= c
       enddo
     endif
-    !DEC$ FORCEINLINE
+    !DIR$ FORCEINLINE
     call multiply_poly(X,nx,B_00,2,d,nd)
   endif
   
   ny=0
   
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     Y(ix) = 0.d0
@@ -970,7 +952,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
     call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
   endif
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(Y,ny,C_00,2,d,nd)
   
 end
@@ -997,7 +979,6 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   endif
   
   nx = nd
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     X(ix) = 0.d0
@@ -1005,26 +986,24 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
   
   if (c>1) then
-    !DIR$ VECTOR ALIGNED
     !DIR$ LOOP COUNT(8)
     do ix=0,nx
       X(ix) *= dble(c)
     enddo
   endif
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(X,nx,B_00,2,d,nd)
   
   ny=0
   
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     Y(ix) = 0.d0
   enddo
   call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(Y,ny,C_00,2,d,nd)
   
 end
@@ -1045,7 +1024,6 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
   integer                        :: nx, ix,iy,ny
   
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     X(ix) = 0.d0
@@ -1053,11 +1031,10 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   nx = 0
   call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(X,nx,B_10,2,d,nd)
   
   nx = nd
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     X(ix) = 0.d0
@@ -1067,26 +1044,24 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
   call I_x1_pol_mult_a1(c-1,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
   
   if (c>1) then
-    !DIR$ VECTOR ALIGNED
     !DIR$ LOOP COUNT(8)
     do ix=0,nx
       X(ix) *= dble(c)
     enddo
   endif
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(X,nx,B_00,2,d,nd)
   
   ny=0
-  !DIR$ VECTOR ALIGNED
   !DIR$ LOOP COUNT(8)
   do ix=0,n_pt_in
     Y(ix) = 0.d0
   enddo
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(Y,ny,C_00,2,d,nd)
   
 end
@@ -1104,7 +1079,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
   double precision, intent(in)   :: B_10(0:2),B_01(0:2),B_00(0:2),C_00(0:2),D_00(0:2)
   integer                        :: nx, ix,ny
   double precision               :: X(0:max_dim),Y(0:max_dim)
-  !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
   integer                        :: i
   
   select case (c)
@@ -1135,13 +1110,12 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
       Y(1) = D_00(1)
       Y(2) = D_00(2)
       
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
       call multiply_poly(Y,ny,D_00,2,d,nd)
       return
       
       case default
       
-      !DIR$ VECTOR ALIGNED
       !DIR$ LOOP COUNT(6)
       do ix=0,c+c
         X(ix) = 0.d0
@@ -1149,24 +1123,22 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
       nx = 0
       call I_x2_pol_mult(c-2,B_10,B_01,B_00,C_00,D_00,X,nx,dim)
       
-      !DIR$ VECTOR ALIGNED
       !DIR$ LOOP COUNT(6)
       do ix=0,nx
         X(ix) *= dble(c-1)
       enddo
       
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
       call multiply_poly(X,nx,B_01,2,d,nd)
       
       ny = 0
-      !DIR$ VECTOR ALIGNED
       !DIR$ LOOP COUNT(6)
       do ix=0,c+c
         Y(ix) = 0.d0
       enddo
       call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
       
-      !DEC$ FORCEINLINE
+      !DIR$ FORCEINLINE
       call multiply_poly(Y,ny,D_00,2,d,nd)
       
   end select
diff --git a/src/Integrals_Bielec/gauss_legendre.irp.f b/src/Integrals_Bielec/gauss_legendre.irp.f
index e62febeb..e7545d58 100644
--- a/src/Integrals_Bielec/gauss_legendre.irp.f
+++ b/src/Integrals_Bielec/gauss_legendre.irp.f
@@ -1,14 +1,5 @@
-BEGIN_PROVIDER [ integer, n_pt_max_integrals_16 ]
- implicit none
- BEGIN_DOC
- ! Aligned n_pt_max_integrals
- END_DOC
- integer, external :: align_double
- n_pt_max_integrals_16 = align_double(n_pt_max_integrals)
-END_PROVIDER
-
- BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals_16,n_pt_max_integrals/2) ]
-&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals_16,n_pt_max_integrals/2) ]
+ BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals,n_pt_max_integrals/2) ]
+&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals,n_pt_max_integrals/2) ]
    implicit none
    BEGIN_DOC
    ! t_w(i,1,k) = w(i)
diff --git a/src/Integrals_Bielec/integrals_3_index.irp.f b/src/Integrals_Bielec/integrals_3_index.irp.f
index 41037b34..fd3551bb 100644
--- a/src/Integrals_Bielec/integrals_3_index.irp.f
+++ b/src/Integrals_Bielec/integrals_3_index.irp.f
@@ -1,5 +1,5 @@
- BEGIN_PROVIDER [double precision, big_array_coulomb_integrals, (mo_tot_num_align,mo_tot_num, mo_tot_num)]
-&BEGIN_PROVIDER [double precision, big_array_exchange_integrals,(mo_tot_num_align,mo_tot_num, mo_tot_num)]
+ BEGIN_PROVIDER [double precision, big_array_coulomb_integrals, (mo_tot_num,mo_tot_num, mo_tot_num)]
+&BEGIN_PROVIDER [double precision, big_array_exchange_integrals,(mo_tot_num,mo_tot_num, mo_tot_num)]
  implicit none
  integer :: i,j,k,l
  double precision :: get_mo_bielec_integral
diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f
index 6a75e523..2375ddc4 100644
--- a/src/Integrals_Bielec/mo_bi_integrals.irp.f
+++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f
@@ -207,7 +207,7 @@ subroutine add_integrals_to_map(mask_ijkl)
   double precision, allocatable  :: bielec_tmp_1(:)
   double precision, allocatable  :: bielec_tmp_2(:,:)
   double precision, allocatable  :: bielec_tmp_3(:,:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
+  !DIR$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
   
   integer                        :: n_integrals
   integer                        :: size_buffer
@@ -276,7 +276,7 @@ subroutine add_integrals_to_map(mask_ijkl)
   
   size_buffer = min(ao_num*ao_num*ao_num,16000000)
   print*, 'Providing the molecular integrals '
-  print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +&
+  print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+&
       ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
   
   call wall_time(wall_1)
@@ -289,18 +289,18 @@ subroutine add_integrals_to_map(mask_ijkl)
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
       !$OMP  wall_0,thread_num,accu_bis)                             &
       !$OMP  DEFAULT(NONE)                                           &
-      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,mo_tot_num_align,&
+      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,   &
       !$OMP  mo_coef_transp,                                         &
       !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
       !$OMP  mo_coef_is_built, wall_1,                               &
       !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
   n_integrals = 0
   wall_0 = wall_1
-  allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k),                 &
-      bielec_tmp_1(mo_tot_num_align),                                &
+  allocate(bielec_tmp_3(mo_tot_num, n_j, n_k),                 &
+      bielec_tmp_1(mo_tot_num),                                &
       bielec_tmp_0(ao_num,ao_num),                                   &
       bielec_tmp_0_idx(ao_num),                                      &
-      bielec_tmp_2(mo_tot_num_align, n_j),                           &
+      bielec_tmp_2(mo_tot_num, n_j),                           &
       buffer_i(size_buffer),                                         &
       buffer_value(size_buffer) )
   
@@ -308,10 +308,8 @@ subroutine add_integrals_to_map(mask_ijkl)
   !$  thread_num = omp_get_thread_num()
   !$OMP DO SCHEDULE(guided)
   do l1 = 1,ao_num
-    !DEC$ VECTOR ALIGNED
     bielec_tmp_3 = 0.d0
     do k1 = 1,ao_num
-      !DEC$ VECTOR ALIGNED
       bielec_tmp_2 = 0.d0
       do j1 = 1,ao_num
         call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1))
@@ -333,7 +331,6 @@ subroutine add_integrals_to_map(mask_ijkl)
           cycle
         endif
         
-        !DEC$ VECTOR ALIGNED
         bielec_tmp_1 = 0.d0
         ii1=1
         do ii1 = 1,kmax-4,4
@@ -443,7 +440,7 @@ subroutine add_integrals_to_map(mask_ijkl)
             endif
             n_integrals += 1
             buffer_value(n_integrals) = bielec_tmp_1(i)
-            !DEC$ FORCEINLINE
+            !DIR$ FORCEINLINE
             call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
             if (n_integrals == size_buffer) then
               call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@@ -514,7 +511,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
   double precision, allocatable  :: bielec_tmp_1(:)
   double precision, allocatable  :: bielec_tmp_2(:,:)
   double precision, allocatable  :: bielec_tmp_3(:,:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
+  !DIR$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
   
   integer                        :: n_integrals
   integer                        :: size_buffer
@@ -571,7 +568,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
   
   size_buffer = min(ao_num*ao_num*ao_num,16000000)
   print*, 'Providing the molecular integrals '
-  print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +&
+  print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+&
       ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
   
   call wall_time(wall_1)
@@ -583,18 +580,18 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
       !$OMP  wall_0,thread_num,accu_bis)                             &
       !$OMP  DEFAULT(NONE)                                           &
-      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,mo_tot_num_align,&
+      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,       &
       !$OMP  mo_coef_transp,                                         &
       !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
       !$OMP  mo_coef_is_built, wall_1,                               &
       !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
   n_integrals = 0
   wall_0 = wall_1
-  allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k),                 &
-      bielec_tmp_1(mo_tot_num_align),                                &
+  allocate(bielec_tmp_3(mo_tot_num, n_j, n_k),                 &
+      bielec_tmp_1(mo_tot_num),                                &
       bielec_tmp_0(ao_num,ao_num),                                   &
       bielec_tmp_0_idx(ao_num),                                      &
-      bielec_tmp_2(mo_tot_num_align, n_j),                           &
+      bielec_tmp_2(mo_tot_num, n_j),                           &
       buffer_i(size_buffer),                                         &
       buffer_value(size_buffer) )
   
@@ -602,10 +599,8 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
   !$  thread_num = omp_get_thread_num()
   !$OMP DO SCHEDULE(guided)
   do l1 = 1,ao_num
-    !DEC$ VECTOR ALIGNED
     bielec_tmp_3 = 0.d0
     do k1 = 1,ao_num
-      !DEC$ VECTOR ALIGNED
       bielec_tmp_2 = 0.d0
       do j1 = 1,ao_num
         call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1))
@@ -626,7 +621,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
           cycle
         endif
         
-        !DEC$ VECTOR ALIGNED
         bielec_tmp_1 = 0.d0
         ii1=1
         do ii1 = 1,kmax-4,4
@@ -728,7 +722,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
           if(i==k .and. j==l .and. i.ne.j)then
             buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0
           endif
-          !DEC$ FORCEINLINE
+          !DIR$ FORCEINLINE
           call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
           if (n_integrals == size_buffer) then
             call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@@ -760,7 +754,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
         
         n_integrals += 1
         buffer_value(n_integrals) = bielec_tmp_1(i)
-        !DEC$ FORCEINLINE
+        !DIR$ FORCEINLINE
         call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
         if (n_integrals == size_buffer) then
           call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@@ -828,7 +822,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
   double precision, allocatable  :: bielec_tmp_1(:)
   double precision, allocatable  :: bielec_tmp_2(:,:)
   double precision, allocatable  :: bielec_tmp_3(:,:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
+  !DIR$ ATTRIBUTES ALIGN : 64    :: bielec_tmp_1, bielec_tmp_2, bielec_tmp_3
   
   integer                        :: n_integrals
   integer                        :: size_buffer
@@ -853,7 +847,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
   
   size_buffer = min(ao_num*ao_num*ao_num,16000000)
   print*, 'Providing the molecular integrals '
-  print*, 'Buffers : ', 8.*(mo_tot_num_align*(n_j)*(n_k+1) + mo_tot_num_align +&
+  print*, 'Buffers : ', 8.*(mo_tot_num*(n_j)*(n_k+1) + mo_tot_num+&
       ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
   
   call wall_time(wall_1)
@@ -864,18 +858,18 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
       !$OMP  wall_0,thread_num)                                      &
       !$OMP  DEFAULT(NONE)                                           &
-      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,mo_tot_num_align,&
+      !$OMP  SHARED(size_buffer,ao_num,mo_tot_num,n_i,n_j,n_k,n_l,   &
       !$OMP  mo_coef_transp,                                         &
       !$OMP  mo_coef_transp_is_built, list_ijkl,                     &
       !$OMP  mo_coef_is_built, wall_1,                               &
       !$OMP  mo_coef,mo_integrals_threshold,mo_integrals_map)
   n_integrals = 0
   wall_0 = wall_1
-  allocate(bielec_tmp_3(mo_tot_num_align, n_j, n_k),                 &
-      bielec_tmp_1(mo_tot_num_align),                                &
+  allocate(bielec_tmp_3(mo_tot_num, n_j, n_k),                 &
+      bielec_tmp_1(mo_tot_num),                                &
       bielec_tmp_0(ao_num,ao_num),                                   &
       bielec_tmp_0_idx(ao_num),                                      &
-      bielec_tmp_2(mo_tot_num_align, n_j),                           &
+      bielec_tmp_2(mo_tot_num, n_j),                           &
       buffer_i(size_buffer),                                         &
       buffer_value(size_buffer) )
   
@@ -888,10 +882,8 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
     !      cycle
     !    endif
     !IRP_ENDIF
-    !DEC$ VECTOR ALIGNED
     bielec_tmp_3 = 0.d0
     do k1 = 1,ao_num
-      !DEC$ VECTOR ALIGNED
       bielec_tmp_2 = 0.d0
       do j1 = 1,ao_num
         call get_ao_bielec_integrals(j1,k1,l1,ao_num,bielec_tmp_0(1,j1))
@@ -913,7 +905,6 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
           cycle
         endif
         
-        !DEC$ VECTOR ALIGNED
         bielec_tmp_1 = 0.d0
         ii1=1
         do ii1 = 1,kmax-4,4
@@ -1018,7 +1009,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
             endif
             n_integrals += 1
             buffer_value(n_integrals) = bielec_tmp_1(i)
-            !DEC$ FORCEINLINE
+            !DIR$ FORCEINLINE
             call mo_bielec_integrals_index(i,j,k,l,buffer_i(n_integrals))
             if (n_integrals == size_buffer) then
               call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
@@ -1071,9 +1062,9 @@ end
 
 
 
- BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti_from_ao, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti_from_ao, (mo_tot_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! mo_bielec_integral_jj_from_ao(i,j) = J_ij
@@ -1103,20 +1094,19 @@ end
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx,  &
       !$OMP  iqrs, iqsr,iqri,iqis)                                   &
-      !$OMP SHARED(mo_tot_num,mo_coef_transp,mo_tot_num_align,ao_num,&
+      !$OMP SHARED(mo_tot_num,mo_coef_transp,ao_num,                 &
       !$OMP  ao_integrals_threshold,do_direct_integrals)             &
       !$OMP REDUCTION(+:mo_bielec_integral_jj_from_ao,mo_bielec_integral_jj_exchange_from_ao)
   
   allocate( int_value(ao_num), int_idx(ao_num),                      &
-      iqrs(mo_tot_num_align,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
-      iqsr(mo_tot_num_align,ao_num) )
+      iqrs(mo_tot_num,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
+      iqsr(mo_tot_num,ao_num) )
   
   !$OMP DO SCHEDULE (guided)
   do s=1,ao_num
     do q=1,ao_num
       
       do j=1,ao_num
-        !DIR$ VECTOR ALIGNED
         do i=1,mo_tot_num
           iqrs(i,j) = 0.d0
           iqsr(i,j) = 0.d0
@@ -1130,7 +1120,6 @@ end
           do p=1,ao_num
             integral = int_value(p)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i=1,mo_tot_num
                 iqrs(i,r) += mo_coef_transp(i,p) * integral
               enddo
@@ -1140,7 +1129,6 @@ end
           do p=1,ao_num
             integral = int_value(p)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i=1,mo_tot_num
                 iqsr(i,r) += mo_coef_transp(i,p) * integral
               enddo
@@ -1156,7 +1144,6 @@ end
             p = int_idx(pp)
             integral = int_value(pp)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i=1,mo_tot_num
                 iqrs(i,r) += mo_coef_transp(i,p) * integral
               enddo
@@ -1167,7 +1154,6 @@ end
             p = int_idx(pp)
             integral = int_value(pp)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i=1,mo_tot_num
                 iqsr(i,r) += mo_coef_transp(i,p) * integral
               enddo
@@ -1178,14 +1164,12 @@ end
       iqis = 0.d0
       iqri = 0.d0
       do r=1,ao_num
-        !DIR$ VECTOR ALIGNED
         do i=1,mo_tot_num
           iqis(i) += mo_coef_transp(i,r) * iqrs(i,r)
           iqri(i) += mo_coef_transp(i,r) * iqsr(i,r)
         enddo
       enddo
       do i=1,mo_tot_num
-        !DIR$ VECTOR ALIGNED
         do j=1,mo_tot_num
           c = mo_coef_transp(j,q)*mo_coef_transp(j,s)
           mo_bielec_integral_jj_from_ao(j,i) += c * iqis(i)
@@ -1204,9 +1188,9 @@ end
   
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_exchange_from_ao, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_anti_from_ao, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_exchange_from_ao, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_vv_anti_from_ao, (mo_tot_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! mo_bielec_integral_vv_from_ao(i,j) = J_ij
@@ -1238,20 +1222,19 @@ END_PROVIDER
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx,&
       !$OMP  iqrs, iqsr,iqri,iqis)                                   &
-      !$OMP SHARED(n_virt_orb,mo_tot_num,list_virt,mo_coef_transp,mo_tot_num_align,ao_num,&
+      !$OMP SHARED(n_virt_orb,mo_tot_num,list_virt,mo_coef_transp,ao_num,&
       !$OMP  ao_integrals_threshold,do_direct_integrals)             &
       !$OMP REDUCTION(+:mo_bielec_integral_vv_from_ao,mo_bielec_integral_vv_exchange_from_ao)
   
   allocate( int_value(ao_num), int_idx(ao_num),                      &
-      iqrs(mo_tot_num_align,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
-      iqsr(mo_tot_num_align,ao_num) )
+      iqrs(mo_tot_num,ao_num), iqis(mo_tot_num), iqri(mo_tot_num),&
+      iqsr(mo_tot_num,ao_num) )
   
   !$OMP DO SCHEDULE (guided)
   do s=1,ao_num
     do q=1,ao_num
       
       do j=1,ao_num
-        !DIR$ VECTOR ALIGNED
         do i0=1,n_virt_orb
           i = list_virt(i0)
           iqrs(i,j) = 0.d0
@@ -1266,7 +1249,6 @@ END_PROVIDER
           do p=1,ao_num
             integral = int_value(p)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i0=1,n_virt_orb
                 i = list_virt(i0)
                 iqrs(i,r) += mo_coef_transp(i,p) * integral
@@ -1277,7 +1259,6 @@ END_PROVIDER
           do p=1,ao_num
             integral = int_value(p)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i0=1,n_virt_orb
                 i =list_virt(i0)
                 iqsr(i,r) += mo_coef_transp(i,p) * integral
@@ -1294,7 +1275,6 @@ END_PROVIDER
             p = int_idx(pp)
             integral = int_value(pp)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i0=1,n_virt_orb
                 i =list_virt(i0)
                 iqrs(i,r) += mo_coef_transp(i,p) * integral
@@ -1306,7 +1286,6 @@ END_PROVIDER
             p = int_idx(pp)
             integral = int_value(pp)
             if (abs(integral) > ao_integrals_threshold) then
-              !DIR$ VECTOR ALIGNED
               do i0=1,n_virt_orb
                 i = list_virt(i0)
                 iqsr(i,r) += mo_coef_transp(i,p) * integral
@@ -1318,7 +1297,6 @@ END_PROVIDER
       iqis = 0.d0
       iqri = 0.d0
       do r=1,ao_num
-        !DIR$ VECTOR ALIGNED
         do i0=1,n_virt_orb
           i = list_virt(i0)
           iqis(i) += mo_coef_transp(i,r) * iqrs(i,r)
@@ -1327,7 +1305,6 @@ END_PROVIDER
       enddo
       do i0=1,n_virt_orb
         i= list_virt(i0)
-        !DIR$ VECTOR ALIGNED
         do j0=1,n_virt_orb
           j = list_virt(j0)
           c = mo_coef_transp(j,q)*mo_coef_transp(j,s)
@@ -1354,9 +1331,9 @@ END_PROVIDER
 END_PROVIDER
 
 
- BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange, (mo_tot_num_align,mo_tot_num) ]
-&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti, (mo_tot_num_align,mo_tot_num) ]
+ BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_exchange, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bielec_integral_jj_anti, (mo_tot_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! mo_bielec_integral_jj(i,j) = J_ij
diff --git a/src/Integrals_Monoelec/ao_mono_ints.irp.f b/src/Integrals_Monoelec/ao_mono_ints.irp.f
index 4646326e..87d03ac4 100644
--- a/src/Integrals_Monoelec/ao_mono_ints.irp.f
+++ b/src/Integrals_Monoelec/ao_mono_ints.irp.f
@@ -1,4 +1,4 @@
- BEGIN_PROVIDER [ double precision, ao_mono_elec_integral,(ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_mono_elec_integral,(ao_num,ao_num)]
 &BEGIN_PROVIDER [ double precision, ao_mono_elec_integral_diag,(ao_num)]
   implicit none
   integer :: i,j,n,l
@@ -7,7 +7,6 @@
  ! : sum of the kinetic and nuclear electronic potential 
   END_DOC
   do j = 1, ao_num
-   !DIR$ VECTOR ALIGNED
    do i = 1, ao_num
     ao_mono_elec_integral(i,j) = ao_nucl_elec_integral(i,j) + ao_kinetic_integral(i,j) + ao_pseudo_integral(i,j)
    enddo
diff --git a/src/Integrals_Monoelec/kin_mo_ints.irp.f b/src/Integrals_Monoelec/kin_mo_ints.irp.f
index 2301c23d..262e4805 100644
--- a/src/Integrals_Monoelec/kin_mo_ints.irp.f
+++ b/src/Integrals_Monoelec/kin_mo_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, mo_kinetic_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_kinetic_integral, (mo_tot_num,mo_tot_num)]
   implicit none
   BEGIN_DOC
   !  Kinetic energy integrals in the MO basis
diff --git a/src/Integrals_Monoelec/mo_mono_ints.irp.f b/src/Integrals_Monoelec/mo_mono_ints.irp.f
index 0d912852..891ed3d5 100644
--- a/src/Integrals_Monoelec/mo_mono_ints.irp.f
+++ b/src/Integrals_Monoelec/mo_mono_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [ double precision, mo_mono_elec_integral,(mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [ double precision, mo_mono_elec_integral,(mo_tot_num,mo_tot_num)]
   implicit none
   integer                        :: i,j,n,l
   BEGIN_DOC
diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f
index 22cceab9..2a1eaf67 100644
--- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f
+++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
   ! Pseudo-potential integrals
@@ -29,7 +29,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral, (ao_num_align,ao_num)]
   
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
   ! Local pseudo-potential
@@ -128,7 +128,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu
  END_PROVIDER
 
 
- BEGIN_PROVIDER [ double precision, ao_pseudo_integral_non_local, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_non_local, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
   ! Local pseudo-potential
diff --git a/src/Integrals_Monoelec/pot_mo_ints.irp.f b/src/Integrals_Monoelec/pot_mo_ints.irp.f
index 7c7e306f..5810b4f3 100644
--- a/src/Integrals_Monoelec/pot_mo_ints.irp.f
+++ b/src/Integrals_Monoelec/pot_mo_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num,mo_tot_num)]
  implicit none
  BEGIN_DOC
 ! interaction nuclear electron on the MO basis
@@ -25,7 +25,7 @@ BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_to
 END_PROVIDER
 
 
-BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num_align,mo_tot_num,nucl_num)]
+BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num,mo_tot_num,nucl_num)]
  implicit none
  BEGIN_DOC
 ! mo_nucl_elec_integral_per_atom(i,j,k) = -<MO(i)|1/|r-Rk|MO(j)> 
diff --git a/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f
index f2fee5f4..47e6e277 100644
--- a/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f
+++ b/src/Integrals_Monoelec/pot_mo_pseudo_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, mo_pseudo_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, mo_pseudo_integral, (mo_tot_num,mo_tot_num)]
   implicit none
   BEGIN_DOC
   ! interaction nuclear electron on the MO basis
diff --git a/src/Integrals_Monoelec/spread_dipole_ao.irp.f b/src/Integrals_Monoelec/spread_dipole_ao.irp.f
index 5611ec7f..2ff1494f 100644
--- a/src/Integrals_Monoelec/spread_dipole_ao.irp.f
+++ b/src/Integrals_Monoelec/spread_dipole_ao.irp.f
@@ -1,6 +1,6 @@
-  BEGIN_PROVIDER [ double precision, ao_spread_x, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_spread_y, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_spread_z, (ao_num_align,ao_num)]
+  BEGIN_PROVIDER [ double precision, ao_spread_x, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_spread_y, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_spread_z, (ao_num,ao_num)]
  BEGIN_DOC
  ! array of the integrals of AO_i * x^2 AO_j
  ! array of the integrals of AO_i * y^2 AO_j
@@ -35,8 +35,6 @@
    power_A(1)  = ao_power( j, 1 )
    power_A(2)  = ao_power( j, 2 )
    power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
    do i= 1,ao_num
     B_center(1) = nucl_coord( ao_nucl(i), 1 )
     B_center(2) = nucl_coord( ao_nucl(i), 2 )
@@ -49,7 +47,6 @@
     accu_z = 0.d0
     do n = 1,ao_prim_num(j)
      alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
      do l = 1, ao_prim_num(i)
       c = ao_coef_normalized_ordered_transp(n,j)*ao_coef_normalized_ordered_transp(l,i)
       beta = ao_expo_ordered_transp(l,i)
@@ -72,9 +69,9 @@
 
 
 
-  BEGIN_PROVIDER [ double precision, ao_dipole_x, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_dipole_y, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_dipole_z, (ao_num_align,ao_num)]
+  BEGIN_PROVIDER [ double precision, ao_dipole_x, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_dipole_y, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_dipole_z, (ao_num,ao_num)]
  BEGIN_DOC
  ! array of the integrals of AO_i * x AO_j
  ! array of the integrals of AO_i * y AO_j
@@ -109,8 +106,6 @@
    power_A(1)  = ao_power( j, 1 )
    power_A(2)  = ao_power( j, 2 )
    power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
    do i= 1,ao_num
     B_center(1) = nucl_coord( ao_nucl(i), 1 )
     B_center(2) = nucl_coord( ao_nucl(i), 2 )
@@ -123,7 +118,6 @@
     accu_z = 0.d0
     do n = 1,ao_prim_num(j)
      alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
      do l = 1, ao_prim_num(i)
       beta = ao_expo_ordered_transp(l,i)
       c = ao_coef_normalized_ordered_transp(l,i)*ao_coef_normalized_ordered_transp(n,j)
@@ -145,9 +139,9 @@
   !$OMP END PARALLEL DO
  END_PROVIDER
 
-  BEGIN_PROVIDER [ double precision, ao_deriv_1_x, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_deriv_1_y, (ao_num_align,ao_num)]
- &BEGIN_PROVIDER [ double precision, ao_deriv_1_z, (ao_num_align,ao_num)]
+  BEGIN_PROVIDER [ double precision, ao_deriv_1_x, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_deriv_1_y, (ao_num,ao_num)]
+ &BEGIN_PROVIDER [ double precision, ao_deriv_1_z, (ao_num,ao_num)]
  BEGIN_DOC
  ! array of the integrals of AO_i * d/dx  AO_j
  ! array of the integrals of AO_i * d/dy  AO_j
@@ -183,8 +177,6 @@
    power_A(1)  = ao_power( j, 1 )
    power_A(2)  = ao_power( j, 2 )
    power_A(3)  = ao_power( j, 3 )
-   !DEC$ VECTOR ALIGNED
-   !DEC$ VECTOR ALWAYS
    do i= 1,ao_num
     B_center(1) = nucl_coord( ao_nucl(i), 1 )
     B_center(2) = nucl_coord( ao_nucl(i), 2 )
@@ -197,7 +189,6 @@
     accu_z = 0.d0
     do n = 1,ao_prim_num(j)
      alpha = ao_expo_ordered_transp(n,j)
-     !DEC$ VECTOR ALIGNED
      do l = 1, ao_prim_num(i)
       beta = ao_expo_ordered_transp(l,i)
       call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
diff --git a/src/Integrals_Monoelec/spread_dipole_mo.irp.f b/src/Integrals_Monoelec/spread_dipole_mo.irp.f
index aa5ef8aa..9e21ec21 100644
--- a/src/Integrals_Monoelec/spread_dipole_mo.irp.f
+++ b/src/Integrals_Monoelec/spread_dipole_mo.irp.f
@@ -1,6 +1,6 @@
- BEGIN_PROVIDER [double precision, mo_dipole_x , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_dipole_y , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_dipole_z , (mo_tot_num_align,mo_tot_num)]
+ BEGIN_PROVIDER [double precision, mo_dipole_x , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_dipole_y , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_dipole_z , (mo_tot_num,mo_tot_num)]
  BEGIN_DOC
  ! array of the integrals of MO_i * x MO_j
  ! array of the integrals of MO_i * y MO_j
@@ -29,9 +29,9 @@
 
 END_PROVIDER
 
- BEGIN_PROVIDER [double precision, mo_spread_x , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_spread_y , (mo_tot_num_align,mo_tot_num)]
-&BEGIN_PROVIDER [double precision, mo_spread_z , (mo_tot_num_align,mo_tot_num)]
+ BEGIN_PROVIDER [double precision, mo_spread_x , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_spread_y , (mo_tot_num,mo_tot_num)]
+&BEGIN_PROVIDER [double precision, mo_spread_z , (mo_tot_num,mo_tot_num)]
  BEGIN_DOC
  ! array of the integrals of MO_i * x^2 MO_j
  ! array of the integrals of MO_i * y^2 MO_j
diff --git a/src/MOGuess/mo_ortho_lowdin.irp.f b/src/MOGuess/mo_ortho_lowdin.irp.f
index 519e4f0d..ab90aa3f 100644
--- a/src/MOGuess/mo_ortho_lowdin.irp.f
+++ b/src/MOGuess/mo_ortho_lowdin.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
 ! matrix of the coefficients of the mos generated by the 
@@ -8,12 +8,12 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)]
   integer                        :: i,j,k,l
   double precision               :: accu
   double precision, allocatable  :: tmp_matrix(:,:)
-  allocate (tmp_matrix(ao_num_align,ao_num))
+  allocate (tmp_matrix(ao_num,ao_num))
   tmp_matrix(:,:) = 0.d0
   do j=1, ao_num
     tmp_matrix(j,j) = 1.d0
   enddo
-  call ortho_lowdin(ao_overlap,ao_num_align,ao_num,tmp_matrix,ao_num_align,ao_num)
+  call ortho_lowdin(ao_overlap,ao_num,ao_num,tmp_matrix,ao_num,ao_num)
   do i=1, ao_num
     do j=1, ao_num
       ao_ortho_lowdin_coef(j,i) = tmp_matrix(i,j)
@@ -22,7 +22,7 @@ BEGIN_PROVIDER [double precision, ao_ortho_lowdin_coef, (ao_num_align,ao_num)]
   deallocate(tmp_matrix)
 END_PROVIDER
 
-BEGIN_PROVIDER [double precision, ao_ortho_lowdin_overlap, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_lowdin_overlap, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
 ! overlap matrix of the ao_ortho_lowdin
diff --git a/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f b/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f
index 9c61ebcd..78fe7948 100644
--- a/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f
+++ b/src/MOGuess/pot_mo_ortho_canonical_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_canonical_nucl_elec_integral, (mo_tot_num,mo_tot_num)]
  implicit none
  integer :: i1,j1,i,j
  double precision :: c_i1,c_j1
diff --git a/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f b/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f
index 3b1875f0..27ad6503 100644
--- a/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f
+++ b/src/MOGuess/pot_mo_ortho_lowdin_ints.irp.f
@@ -1,4 +1,4 @@
-BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integral, (mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [double precision, ao_ortho_lowdin_nucl_elec_integral, (mo_tot_num,mo_tot_num)]
  implicit none
  integer :: i1,j1,i,j
  double precision :: c_i1,c_j1
diff --git a/src/MO_Basis/ao_ortho_canonical.irp.f b/src/MO_Basis/ao_ortho_canonical.irp.f
index 2184ce4a..b0400f67 100644
--- a/src/MO_Basis/ao_ortho_canonical.irp.f
+++ b/src/MO_Basis/ao_ortho_canonical.irp.f
@@ -1,4 +1,4 @@
- BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_cart_to_sphe_coef, (ao_num,ao_num)]
 &BEGIN_PROVIDER [ integer, ao_cart_to_sphe_num ]
   implicit none
   BEGIN_DOC
@@ -83,7 +83,7 @@ END_PROVIDER
 
 
 
-BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num_align,ao_num)]
+BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num,ao_num)]
  implicit none
  BEGIN_DOC
 ! ao_ortho_canonical_coef^(-1)
@@ -92,7 +92,7 @@ BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef_inv, (ao_num_align,ao
      ao_num, ao_ortho_canonical_coef_inv, size(ao_ortho_canonical_coef_inv,1))
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num_align,ao_num)]
+ BEGIN_PROVIDER [ double precision, ao_ortho_canonical_coef, (ao_num,ao_num)]
 &BEGIN_PROVIDER [ integer, ao_ortho_canonical_num ]
   implicit none
   BEGIN_DOC
diff --git a/src/MO_Basis/mo_overlap.irp.f b/src/MO_Basis/mo_overlap.irp.f
index c7e146bc..291bb38d 100644
--- a/src/MO_Basis/mo_overlap.irp.f
+++ b/src/MO_Basis/mo_overlap.irp.f
@@ -1,5 +1,5 @@
 
-BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)]
+BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num,mo_tot_num)]
   implicit none
   integer :: i,j,n,l
   double precision :: f
@@ -13,7 +13,6 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)]
    do i= 1,mo_tot_num
     mo_overlap(i,j) = 0.d0
     do n = 1, lmax,4
-     !DIR$ VECTOR ALIGNED
      do l = 1, ao_num
       mo_overlap(i,j) = mo_overlap(i,j) + mo_coef(l,i) * &
            ( mo_coef(n  ,j) * ao_overlap(l,n  )  &
@@ -23,7 +22,6 @@ BEGIN_PROVIDER [ double precision, mo_overlap,(mo_tot_num_align,mo_tot_num)]
      enddo
     enddo
     do n = lmax+1, ao_num
-     !DIR$ VECTOR ALIGNED
      do l = 1, ao_num
       mo_overlap(i,j) = mo_overlap(i,j) + mo_coef(n,j) * mo_coef(l,i) * ao_overlap(l,n)
      enddo
diff --git a/src/MO_Basis/mo_permutation.irp.f b/src/MO_Basis/mo_permutation.irp.f
index 72f132d7..72fbd07b 100644
--- a/src/MO_Basis/mo_permutation.irp.f
+++ b/src/MO_Basis/mo_permutation.irp.f
@@ -2,7 +2,7 @@ program permut_mos
  implicit none
  integer :: mo1,mo2
  integer :: i,j,k,l
- double precision :: mo_coef_tmp(ao_num_align,2)
+ double precision :: mo_coef_tmp(ao_num,2)
  print*,'Which MOs would you like to change ?'
  read(5,*)mo1,mo2
  print*,''
diff --git a/src/MO_Basis/mos.irp.f b/src/MO_Basis/mos.irp.f
index 4bcd7221..b2214b0d 100644
--- a/src/MO_Basis/mos.irp.f
+++ b/src/MO_Basis/mos.irp.f
@@ -16,17 +16,7 @@ BEGIN_PROVIDER [ integer, mo_tot_num ]
 END_PROVIDER
 
 
-BEGIN_PROVIDER [ integer, mo_tot_num_align ]
-  implicit none
-  BEGIN_DOC
-  ! Aligned variable for dimensioning of arrays
-  END_DOC
-  integer                        :: align_double
-  mo_tot_num_align = align_double(mo_tot_num)
-END_PROVIDER
-
-
- BEGIN_PROVIDER [ double precision, mo_coef, (ao_num_align,mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_tot_num) ]
   implicit none
   BEGIN_DOC
   ! Molecular orbital coefficients on AO basis set
@@ -42,32 +32,18 @@ END_PROVIDER
   ! Coefs
   call ezfio_has_mo_basis_mo_coef(exists)
   if (exists) then
-    allocate(buffer(ao_num,mo_tot_num))
-    buffer = 0.d0
-    call ezfio_get_mo_basis_mo_coef(buffer)
-    do i=1,mo_tot_num
-      do j=1,ao_num
-        mo_coef(j,i) = buffer(j,i)
-      enddo
-      do j=ao_num+1,ao_num_align
-        mo_coef(j,i) = 0.d0
-      enddo
-    enddo
-    deallocate(buffer)
+    call ezfio_get_mo_basis_mo_coef(mo_coef)
   else
     ! Orthonormalized AO basis
     do i=1,mo_tot_num
       do j=1,ao_num
         mo_coef(j,i) = ao_ortho_canonical_coef(j,i)
       enddo
-      do j=ao_num+1,ao_num_align
-        mo_coef(j,i) = 0.d0
-      enddo
     enddo
   endif
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num_align, mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, mo_coef_in_ao_ortho_basis, (ao_num, mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! MO coefficients in orthogonalized AO basis
@@ -99,7 +75,7 @@ BEGIN_PROVIDER [ character*(64), mo_label ]
   endif
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num_align,ao_num) ]
+BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num,ao_num) ]
   implicit none
   BEGIN_DOC
   ! Molecular orbital coefficients on AO basis set
@@ -110,14 +86,11 @@ BEGIN_PROVIDER [ double precision, mo_coef_transp, (mo_tot_num_align,ao_num) ]
     do i=1,mo_tot_num
       mo_coef_transp(i,j) = mo_coef(j,i)
     enddo
-    do i=mo_tot_num+1,mo_tot_num_align
-      mo_coef_transp(i,j) = 0.d0
-    enddo
   enddo
   
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num_align, mo_tot_num) ]
+BEGIN_PROVIDER [ double precision, S_mo_coef, (ao_num, mo_tot_num) ]
  implicit none
  BEGIN_DOC
  ! Product S.C where S is the overlap matrix in the AO basis and C the mo_coef matrix.
@@ -165,18 +138,18 @@ subroutine ao_to_mo(A_ao,LDA_ao,A_mo,LDA_mo)
   double precision, intent(out)  :: A_mo(LDA_mo,mo_tot_num)
   double precision, allocatable  :: T(:,:)
   
-  allocate ( T(ao_num_align,mo_tot_num) )
+  allocate ( T(ao_num,mo_tot_num) )
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
   
   call dgemm('N','N', ao_num, mo_tot_num, ao_num,                    &
       1.d0, A_ao,LDA_ao,                                             &
       mo_coef, size(mo_coef,1),                                      &
-      0.d0, T, ao_num_align)
+      0.d0, T, size(T,1))
   
   call dgemm('T','N', mo_tot_num, mo_tot_num, ao_num,                &
       1.d0, mo_coef,size(mo_coef,1),                                 &
-      T, ao_num_align,                                               &
-      0.d0, A_mo, LDA_mo)
+      T, ao_num,                                                     &   
+      0.d0, A_mo, size(A_mo,1))
   
   deallocate(T)
 end
@@ -193,7 +166,7 @@ subroutine mo_to_ao(A_mo,LDA_mo,A_ao,LDA_ao)
   double precision, intent(out)  :: A_ao(LDA_ao,ao_num)
   double precision, allocatable  :: T(:,:)
   
-  allocate ( T(mo_tot_num_align,ao_num) )
+  allocate ( T(mo_tot_num,ao_num) )
   
   call dgemm('N','T', mo_tot_num, ao_num, mo_tot_num,                &
       1.d0, A_mo,size(A_mo,1),                                       &
@@ -219,7 +192,7 @@ subroutine mo_to_ao_no_overlap(A_mo,LDA_mo,A_ao,LDA_ao)
   double precision, intent(out)  :: A_ao(LDA_ao,ao_num)
   double precision, allocatable  :: T(:,:)
   
-  allocate ( T(mo_tot_num_align,ao_num) )
+  allocate ( T(mo_tot_num,ao_num) )
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
   
   call dgemm('N','T', mo_tot_num, ao_num, mo_tot_num,                &
@@ -281,7 +254,7 @@ subroutine ao_ortho_cano_to_ao(A_ao,LDA_ao,A,LDA)
   double precision, intent(out)  :: A(LDA,*)
   double precision, allocatable  :: T(:,:)
   
-  allocate ( T(ao_num_align,ao_num) )
+  allocate ( T(ao_num,ao_num) )
   
   call dgemm('T','N', ao_num, ao_num, ao_num,                        &
       1.d0,                                                          &
diff --git a/src/MO_Basis/swap_mos.irp.f b/src/MO_Basis/swap_mos.irp.f
index df0dc64d..80a29965 100644
--- a/src/MO_Basis/swap_mos.irp.f
+++ b/src/MO_Basis/swap_mos.irp.f
@@ -4,7 +4,7 @@ program swap_mos
   double precision :: x
   print *,  'MOs to swap?'
   read(*,*) i1, i2
-  do i=1,ao_num_align
+  do i=1,ao_num
     x = mo_coef(i,i1)
     mo_coef(i,i1) = mo_coef(i,i2)
     mo_coef(i,i2) = x
diff --git a/src/MO_Basis/utils.irp.f b/src/MO_Basis/utils.irp.f
index 4221fce4..4806582b 100644
--- a/src/MO_Basis/utils.irp.f
+++ b/src/MO_Basis/utils.irp.f
@@ -60,7 +60,7 @@ subroutine mo_as_eigvectors_of_mo_matrix(matrix,n,m,label,sign,output)
     print *, irp_here, ': Error : m/= mo_tot_num'
     stop 1
   endif
-  allocate(A(n,m),R(n,m),mo_coef_new(ao_num_align,m),eigvalues(m))
+  allocate(A(n,m),R(n,m),mo_coef_new(ao_num,m),eigvalues(m))
   if (sign == -1) then
     do j=1,m
       do i=1,n
@@ -121,7 +121,7 @@ subroutine mo_as_svd_vectors_of_mo_matrix(matrix,lda,m,n,label)
     stop 1
   endif
 
-  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num_align,m),D(m),Vt(lda,n))
+  allocate(A(lda,n),U(lda,n),mo_coef_new(ao_num,m),D(m),Vt(lda,n))
 
   do j=1,n
     do i=1,m
@@ -167,7 +167,7 @@ subroutine mo_as_eigvectors_of_mo_matrix_sort_by_observable(matrix,observable,n,
     print *, irp_here, ': Error : m/= mo_tot_num'
     stop 1
   endif
-  allocate(R(n,m),mo_coef_new(ao_num_align,m),eigvalues(m),value(m),iorder(m))
+  allocate(R(n,m),mo_coef_new(ao_num,m),eigvalues(m),value(m),iorder(m))
   mo_coef_new = mo_coef
   
   call lapack_diag(eigvalues,R,matrix,size(matrix,1),size(matrix,2))
@@ -242,7 +242,7 @@ subroutine mo_sort_by_observable(observable,label)
   double precision, allocatable  :: mo_coef_new(:,:),value(:)
   integer,allocatable :: iorder(:)
   
-  allocate(mo_coef_new(ao_num_align,mo_tot_num),value(mo_tot_num),iorder(mo_tot_num))
+  allocate(mo_coef_new(ao_num,mo_tot_num),value(mo_tot_num),iorder(mo_tot_num))
   print*,'allocate !'
   mo_coef_new = mo_coef
   
@@ -283,7 +283,7 @@ end
 subroutine give_specific_mos_at_r(r,mos_array, mo_coef_specific)
  implicit none
  double precision, intent(in) :: r(3)
- double precision, intent(in)  :: mo_coef_specific(ao_num_align, mo_tot_num)
+ double precision, intent(in)  :: mo_coef_specific(ao_num, mo_tot_num)
  double precision, intent(out) :: mos_array(mo_tot_num)
  double precision :: aos_array(ao_num),accu
  integer :: i,j
diff --git a/plugins/MPI/.gitignore b/src/MPI/.gitignore
similarity index 100%
rename from plugins/MPI/.gitignore
rename to src/MPI/.gitignore
diff --git a/plugins/MPI/NEEDED_CHILDREN_MODULES b/src/MPI/NEEDED_CHILDREN_MODULES
similarity index 100%
rename from plugins/MPI/NEEDED_CHILDREN_MODULES
rename to src/MPI/NEEDED_CHILDREN_MODULES
diff --git a/plugins/MPI/README.rst b/src/MPI/README.rst
similarity index 100%
rename from plugins/MPI/README.rst
rename to src/MPI/README.rst
diff --git a/plugins/MPI/mpi.irp.f b/src/MPI/mpi.irp.f
similarity index 72%
rename from plugins/MPI/mpi.irp.f
rename to src/MPI/mpi.irp.f
index 0a65b8fd..dbc2caa9 100644
--- a/plugins/MPI/mpi.irp.f
+++ b/src/MPI/mpi.irp.f
@@ -1,23 +1,3 @@
-BEGIN_PROVIDER [ integer, mpi_bit_kind ]
- use bitmasks
- implicit none
- BEGIN_DOC
- ! MPI bit kind type
- END_DOC
- IRP_IF MPI
-  include 'mpif.h'
-  if (bit_kind == 4) then
-    mpi_bit_kind = MPI_INTEGER4
-  else if (bit_kind == 8) then
-    mpi_bit_kind = MPI_INTEGER8
-  else
-    stop 'Wrong bit kind in mpi_bit_kind'
-  endif
- IRP_ELSE
-  mpi_bit_kind = -1
- IRP_ENDIF
-END_PROVIDER
-
 BEGIN_PROVIDER [ logical, mpi_initialized ]
  implicit none
  BEGIN_DOC
@@ -88,7 +68,6 @@ subroutine broadcast_chunks_$double(A, LDA)
   implicit none
   integer, intent(in)             :: LDA
   $type, intent(inout) :: A(LDA)
-  use bitmasks
   BEGIN_DOC
 ! Broadcast with chunks of ~2GB
   END_DOC
@@ -99,7 +78,7 @@ subroutine broadcast_chunks_$double(A, LDA)
       sze = min(LDA-i+1, 200000000/$8)
       call MPI_BCAST (A(i), sze, MPI_$DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
       if (ierr /= MPI_SUCCESS) then
-        print *,  irp_here//': Unable to broadcast chuks $double ', i
+        print *,  irp_here//': Unable to broadcast chunks $double ', i
         stop -1
       endif
     enddo
@@ -107,10 +86,9 @@ subroutine broadcast_chunks_$double(A, LDA)
 end
 
 SUBST [ double, type, 8, DOUBLE_PRECISION ]
-double    ; double precision  ; 8             ; DOUBLE_PRECISION ;;
-bit_kind  ; integer(bit_kind) ; bit_kind_size ; BIT_KIND ;;
-integer   ; integer          ; 4             ; INTEGER4 ;;
-integer8  ; integer*8        ; 8             ; INTEGER8 ;;
+double    ; double precision  ; 8 ; DOUBLE_PRECISION ;;
+integer   ; integer           ; 4  ; INTEGER4 ;;
+integer8  ; integer*8         ; 8  ; INTEGER8 ;;
 
 END_TEMPLATE
 
diff --git a/src/Nuclei/nuclei.irp.f b/src/Nuclei/nuclei.irp.f
index 577b8b92..10fda4cd 100644
--- a/src/Nuclei/nuclei.irp.f
+++ b/src/Nuclei/nuclei.irp.f
@@ -1,15 +1,4 @@
-BEGIN_PROVIDER [ integer, nucl_num_aligned ]
-   implicit none
-   BEGIN_DOC
-   ! Number of nuclei algined
-   END_DOC
-   
-   PROVIDE ezfio_filename
-   integer                        :: align_double
-   nucl_num_aligned = align_double(nucl_num)
-END_PROVIDER
- 
-BEGIN_PROVIDER [ double precision, nucl_coord,  (nucl_num_aligned,3) ]
+BEGIN_PROVIDER [ double precision, nucl_coord,  (nucl_num,3) ]
    implicit none
    
    BEGIN_DOC
@@ -79,11 +68,11 @@ BEGIN_PROVIDER [ double precision, nucl_coord_transp, (3,nucl_num) ]
    enddo
 END_PROVIDER
  
- BEGIN_PROVIDER [ double precision, nucl_dist_2, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist_vec_x, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist_vec_y, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist_vec_z, (nucl_num_aligned,nucl_num) ]
-&BEGIN_PROVIDER [ double precision, nucl_dist, (nucl_num_aligned,nucl_num) ]
+ BEGIN_PROVIDER [ double precision, nucl_dist_2, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist_vec_x, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist_vec_y, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist_vec_z, (nucl_num,nucl_num) ]
+&BEGIN_PROVIDER [ double precision, nucl_dist, (nucl_num,nucl_num) ]
    implicit none
    BEGIN_DOC
    ! nucl_dist     : Nucleus-nucleus distances
@@ -105,16 +94,12 @@ END_PROVIDER
    endif
    
    do ie2 = 1,nucl_num
-     !DEC$ VECTOR ALWAYS
-     !DEC$ VECTOR ALIGNED
-     do ie1 = 1,nucl_num_aligned
+     do ie1 = 1,nucl_num
        nucl_dist_vec_x(ie1,ie2) = nucl_coord(ie1,1) - nucl_coord(ie2,1)
        nucl_dist_vec_y(ie1,ie2) = nucl_coord(ie1,2) - nucl_coord(ie2,2)
        nucl_dist_vec_z(ie1,ie2) = nucl_coord(ie1,3) - nucl_coord(ie2,3)
      enddo
-     !DEC$ VECTOR ALWAYS
-     !DEC$ VECTOR ALIGNED
-     do ie1 = 1,nucl_num_aligned
+     do ie1 = 1,nucl_num
        nucl_dist_2(ie1,ie2) = nucl_dist_vec_x(ie1,ie2)*nucl_dist_vec_x(ie1,ie2) +&
            nucl_dist_vec_y(ie1,ie2)*nucl_dist_vec_y(ie1,ie2) +       &
            nucl_dist_vec_z(ie1,ie2)*nucl_dist_vec_z(ie1,ie2)
diff --git a/src/Selectors_Utils/NEEDED_CHILDREN_MODULES b/src/Selectors_Utils/NEEDED_CHILDREN_MODULES
new file mode 100644
index 00000000..bff2467f
--- /dev/null
+++ b/src/Selectors_Utils/NEEDED_CHILDREN_MODULES
@@ -0,0 +1 @@
+Determinants 
diff --git a/src/Selectors_Utils/README.rst b/src/Selectors_Utils/README.rst
new file mode 100644
index 00000000..fc264fc1
--- /dev/null
+++ b/src/Selectors_Utils/README.rst
@@ -0,0 +1,190 @@
+=====================
+Selectors_full Module
+=====================
+
+Needed Modules
+==============
+
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+
+.. image:: tree_dependency.png
+
+* `Determinants <http://github.com/LCPQ/quantum_package/tree/master/src/Determinants>`_
+* `Hartree_Fock <http://github.com/LCPQ/quantum_package/tree/master/src/Hartree_Fock>`_
+
+Needed Modules
+==============
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+
+
+.. image:: tree_dependency.png
+
+* `Determinants <http://github.com/LCPQ/quantum_package/tree/master/src/Determinants>`_
+* `Hartree_Fock <http://github.com/LCPQ/quantum_package/tree/master/plugins/Hartree_Fock>`_
+
+Documentation
+=============
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+
+
+`coef_hf_selector <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L28>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`delta_e_per_selector <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L33>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`double_index_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L4>`_
+  degree of excitation respect to Hartree Fock for the wave function
+  .br
+  for the all the selectors determinants
+  .br
+  double_index_selectors = list of the index of the double excitations
+  .br
+  n_double_selectors = number of double excitations in the selectors determinants
+
+
+`e_corr_double_only <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L34>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`e_corr_per_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L31>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`e_corr_second_order <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L35>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`exc_degree_per_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L3>`_
+  degree of excitation respect to Hartree Fock for the wave function
+  .br
+  for the all the selectors determinants
+  .br
+  double_index_selectors = list of the index of the double excitations
+  .br
+  n_double_selectors = number of double excitations in the selectors determinants
+
+
+`i_h_hf_per_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L32>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`inv_selectors_coef_hf <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L29>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`inv_selectors_coef_hf_squared <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L30>`_
+  energy of correlation per determinant respect to the Hartree Fock determinant
+  .br
+  for the all the double excitations in the selectors determinants
+  .br
+  E_corr_per_selectors(i) = <D_i|H|HF> * c(D_i)/c(HF) if |D_i> is a double excitation
+  .br
+  E_corr_per_selectors(i) = -1000.d0 if it is not a double excitation
+  .br
+  coef_hf_selector = coefficient of the Hartree Fock determinant in the selectors determinants
+
+
+`n_det_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L8>`_
+  For Single reference wave functions, the number of selectors is 1 : the
+  Hartree-Fock determinant
+
+
+`n_double_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/e_corr_selectors.irp.f#L5>`_
+  degree of excitation respect to Hartree Fock for the wave function
+  .br
+  for the all the selectors determinants
+  .br
+  double_index_selectors = list of the index of the double excitations
+  .br
+  n_double_selectors = number of double excitations in the selectors determinants
+
+
+`psi_selectors <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L32>`_
+  Determinants on which we apply <i|H|psi> for perturbation.
+
+
+`psi_selectors_coef <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L33>`_
+  Determinants on which we apply <i|H|psi> for perturbation.
+
+
+`psi_selectors_coef_transp <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L53>`_
+  Transposed psi_selectors
+
+
+`psi_selectors_diag_h_mat <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L67>`_
+  Diagonal elements of the H matrix for each selectors
+
+
+`psi_selectors_size <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/selectors.irp.f#L3>`_
+  Undocumented
+
+
+`zmq_get_psi <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/zmq.irp.f#L51>`_
+  Get the wave function from the qp_run scheduler
+
+
+`zmq_put_psi <http://github.com/LCPQ/quantum_package/tree/master/plugins/Selectors_full/zmq.irp.f#L1>`_
+  Put the wave function on the qp_run scheduler
+
diff --git a/plugins/Selectors_full/e_corr_selectors.irp.f b/src/Selectors_Utils/e_corr_selectors.irp.f
similarity index 99%
rename from plugins/Selectors_full/e_corr_selectors.irp.f
rename to src/Selectors_Utils/e_corr_selectors.irp.f
index fec480f0..6be8ce6e 100644
--- a/plugins/Selectors_full/e_corr_selectors.irp.f
+++ b/src/Selectors_Utils/e_corr_selectors.irp.f
@@ -1,4 +1,3 @@
-
 use bitmasks
  BEGIN_PROVIDER [integer, exc_degree_per_selectors, (N_det_selectors)]
 &BEGIN_PROVIDER [integer, double_index_selectors, (N_det_selectors)]
diff --git a/src/Selectors_Utils/selectors.irp.f b/src/Selectors_Utils/selectors.irp.f
new file mode 100644
index 00000000..708b709b
--- /dev/null
+++ b/src/Selectors_Utils/selectors.irp.f
@@ -0,0 +1,34 @@
+use bitmasks
+
+BEGIN_PROVIDER [ integer, psi_selectors_size ]
+ implicit none
+ psi_selectors_size = psi_det_size
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Transposed psi_selectors
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_states
+      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Diagonal elements of the H matrix for each selectors 
+  END_DOC
+  integer                        :: i
+  double precision :: diag_H_mat_elem
+  do i = 1, N_det_selectors
+   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
+  enddo
+END_PROVIDER
+
+
diff --git a/plugins/Selectors_full/zmq.irp.f b/src/Selectors_Utils/zmq.irp.f
similarity index 73%
rename from plugins/Selectors_full/zmq.irp.f
rename to src/Selectors_Utils/zmq.irp.f
index eae7e7fd..5648da67 100644
--- a/plugins/Selectors_full/zmq.irp.f
+++ b/src/Selectors_Utils/zmq.irp.f
@@ -19,43 +19,6 @@ subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id)
 end
 
 
-subroutine zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Put the X vector on the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  character*(*)                  :: name
-  integer, intent(in)            :: size_x
-  double precision, intent(out)  :: x(size_x)
-  integer                        :: rc
-  character*(256)                :: msg
-
-
-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, name
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
-  if (rc /= len(trim(msg))) then
-    print *,  irp_here, ': Error sending '//name
-    stop 'error'
-  endif
-
-  rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0)
-  if (rc /= size_x*8) then
-    print *,  irp_here, ': Error sending '//name
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:rc) /= 'put_data_reply ok') then
-    print *,  rc, trim(msg)
-    print *,  irp_here, ': Error in put_data_reply'
-    stop 'error'
-  endif
-
-end
-
 
 BEGIN_TEMPLATE 
 
@@ -70,7 +33,7 @@ subroutine zmq_put_$X(zmq_to_qp_run_socket,worker_id)
   integer                        :: rc
   character*(256)                :: msg
 
-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, '$X'
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, '$X'
   rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
   if (rc /= len(trim(msg))) then
     print *,  irp_here, ': Error sending $X'
@@ -103,7 +66,7 @@ subroutine zmq_get_$X(zmq_to_qp_run_socket, worker_id)
   integer                        :: rc
   character*(64)                 :: msg
 
-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, '$X'
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, '$X'
   rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
   if (rc /= len(trim(msg))) then
     print *,  irp_here, ': Error getting $X'
@@ -132,7 +95,6 @@ N_det ;;
 psi_det_size ;;
 N_det_generators ;;
 N_det_selectors ;;
-N_states_diag ;;
 
 END_TEMPLATE
 
@@ -147,7 +109,7 @@ subroutine zmq_put_psi_det(zmq_to_qp_run_socket,worker_id)
   integer                        :: rc, rc8
   character*(256)                :: msg
 
-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, 'psi_det'
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'psi_det'
   rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
   if (rc /= len(trim(msg))) then
     print *,  irp_here, ': Error sending psi_det'
@@ -179,7 +141,7 @@ subroutine zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id)
   integer                        :: rc, rc8
   character*(256)                :: msg
 
-  write(msg,'(A,X,I,X,A)') 'put_data', worker_id, 'psi_coef'
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, 'psi_coef'
   rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
   if (rc /= len(trim(msg))) then
     print *,  irp_here, ': Error sending psi_coef'
@@ -244,7 +206,7 @@ subroutine zmq_get_psi_det(zmq_to_qp_run_socket, worker_id)
   character*(64)                 :: msg
 
   
-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, 'psi_det' 
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'psi_det' 
   rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
   if (rc /= len(trim(msg))) then
     print *,  irp_here, ': Error getting psi_det'
@@ -279,7 +241,7 @@ subroutine zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id)
   character*(64)                 :: msg
 
 
-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, 'psi_coef'
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, 'psi_coef'
   rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
   if (rc /= len(trim(msg))) then
     print *,  irp_here, ': Error getting psi_coef'
@@ -302,41 +264,3 @@ subroutine zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id)
 end
 
 
-subroutine zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
-  use f77_zmq
-  implicit none
-  BEGIN_DOC
-! Get psi_coef from the qp_run scheduler
-  END_DOC
-  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
-  integer, intent(in)            :: worker_id
-  integer, intent(in)            :: size_x
-  character*(*), intent(in)      :: name
-  double precision, intent(out)  :: x(size_x)
-  integer                        :: rc
-  integer*8                      :: rc8
-  character*(64)                 :: msg
-
-  write(msg,'(A,X,I,X,A)') 'get_data', worker_id, name
-  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
-  if (rc /= len(trim(msg))) then
-    print *,  irp_here, ': Error getting '//name
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
-  if (msg(1:14) /= 'get_data_reply') then
-    print *,  rc, trim(msg)
-    print *,  irp_here, ': Error in get_data_reply'
-    stop 'error'
-  endif
-
-  rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0)
-  if (rc /= size_x*8) then
-    print *, irp_here, ': Error getting '//name
-    stop 'error'
-  endif
-end
-
-
-
diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f
index 46859f1a..6e1b9565 100644
--- a/src/Utils/LinearAlgebra.irp.f
+++ b/src/Utils/LinearAlgebra.irp.f
@@ -70,7 +70,7 @@ subroutine ortho_canonical(overlap,LDA,N,C,LDC,m)
   double precision, allocatable  :: Vt(:,:)
   double precision, allocatable  :: D(:)
   double precision, allocatable  :: S(:,:)
-  !DEC$ ATTRIBUTES ALIGN : 64    :: U, Vt, D
+  !DIR$ ATTRIBUTES ALIGN : 64    :: U, Vt, D
   integer                        :: info, i, j
   
   if (n < 2) then
diff --git a/src/Utils/integration.irp.f b/src/Utils/integration.irp.f
index ad57c52d..cac60001 100644
--- a/src/Utils/integration.irp.f
+++ b/src/Utils/integration.irp.f
@@ -37,7 +37,7 @@ subroutine give_explicit_poly_and_gaussian_x(P_new,P_center,p,fact_k,iorder,alph
   call recentered_poly2(P_a(0),A_center,P_center,a,P_b(0),B_center,P_center,b)
   n_new = 0
 
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(P_a(0),a,P_b(0),b,P_new(0),n_new)
   iorder = a + b
 end
@@ -76,44 +76,41 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   P_new(0,2) = 0.d0
   P_new(0,3) = 0.d0
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center)
   if (fact_k < thresh) then
     fact_k = 0.d0
     return
   endif
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call recentered_poly2(P_a(0,1),A_center(1),P_center(1),a(1),P_b(0,1),B_center(1),P_center(1),b(1))
   iorder(1) = a(1) + b(1)
-  !DIR$ VECTOR ALIGNED
   do i=0,iorder(1)
     P_new(i,1) = 0.d0
   enddo
   n_new=0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(P_a(0,1),a(1),P_b(0,1),b(1),P_new(0,1),n_new)
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call recentered_poly2(P_a(0,2),A_center(2),P_center(2),a(2),P_b(0,2),B_center(2),P_center(2),b(2))
   iorder(2) = a(2) + b(2)
-  !DIR$ VECTOR ALIGNED
   do i=0,iorder(2)
     P_new(i,2) = 0.d0
   enddo
   n_new=0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(P_a(0,2),a(2),P_b(0,2),b(2),P_new(0,2),n_new)
   
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call recentered_poly2(P_a(0,3),A_center(3),P_center(3),a(3),P_b(0,3),B_center(3),P_center(3),b(3))
   iorder(3) = a(3) + b(3)
-  !DIR$ VECTOR ALIGNED
   do i=0,iorder(3)
     P_new(i,3) = 0.d0
   enddo
   n_new=0
-  !DEC$ FORCEINLINE
+  !DIR$ FORCEINLINE
   call multiply_poly(P_a(0,3),a(3),P_b(0,3),b(3),P_new(0,3),n_new)
   
 end
@@ -200,7 +197,7 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
   ASSERT (b>0.)
   
   double precision               :: xab(3), ab
-  !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
   
   p = a+b
   p_inv = 1.d0/(a+b)
@@ -282,7 +279,6 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)
   endif
   ndtmp = nb+nc
   
-  !DIR$ VECTOR ALIGNED
   do ic = 0,nc
     d(ic) = d(ic) + c(ic) * b(0)
   enddo
diff --git a/src/Utils/transpose.irp.f b/src/Utils/transpose.irp.f
index ec33023d..ed86b10e 100644
--- a/src/Utils/transpose.irp.f
+++ b/src/Utils/transpose.irp.f
@@ -8,7 +8,7 @@ recursive subroutine transpose(A,LDA,B,LDB,d1,d2)
  real, intent(in) :: A(LDA,d2)
  real, intent(out) :: B(LDB,d1)
 
- integer :: i,j,k, mod_align
+ integer :: i,j,k
  if ( d2 < 32 ) then
    do j=1,d1
      !DIR$ LOOP COUNT (16)
@@ -55,7 +55,7 @@ recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2)
 ! enddo
 ! return
 
- integer :: i,j,k, mod_align
+ integer :: i,j,k
  if ( d2 < 32 ) then
    do j=1,d1
      !DIR$ LOOP COUNT (16)
diff --git a/src/Utils/util.irp.f b/src/Utils/util.irp.f
index d3b20a10..e4b21f2e 100644
--- a/src/Utils/util.irp.f
+++ b/src/Utils/util.irp.f
@@ -10,7 +10,7 @@ double precision function binom_func(i,j)
   double precision               :: logfact
   integer, save                  :: ifirst
   double precision, save         :: memo(0:15,0:15)
-  !DEC$ ATTRIBUTES ALIGN : $IRP_ALIGN :: memo
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: memo
   integer                        :: k,l
   if (ifirst == 0) then
     ifirst = 1
@@ -45,20 +45,6 @@ end
 END_PROVIDER
 
 
-integer function align_double(n)
-  implicit none
-  BEGIN_DOC
-  ! Compute 1st dimension such that it is aligned for vectorization.
-  END_DOC
-  integer                        :: n
-  include 'constants.include.F'
-  if (mod(n,SIMD_vector/4) /= 0) then
-    align_double= n + SIMD_vector/4 - mod(n,SIMD_vector/4)
-  else
-    align_double= n
-  endif
-end
-
 
 double precision function fact(n)
   implicit none
@@ -333,7 +319,6 @@ subroutine normalize(u,sze)
   implicit none
   BEGIN_DOC
   ! Normalizes vector u
-  ! u is expected to be aligned in memory.
   END_DOC
   integer, intent(in)            :: sze
   double precision, intent(inout):: u(sze)
diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f
new file mode 100644
index 00000000..76c3a339
--- /dev/null
+++ b/src/ZMQ/put_get.irp.f
@@ -0,0 +1,76 @@
+subroutine zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put the X vector on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  character*(*)                  :: name
+  integer, intent(in)            :: size_x
+  double precision, intent(out)  :: x(size_x)
+  integer                        :: rc
+  character*(256)                :: msg
+
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'put_data', worker_id, name
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error sending '//name
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0)
+  if (rc /= size_x*8) then
+    print *,  irp_here, ': Error sending '//name
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_data_reply ok') then
+    print *,  rc, trim(msg)
+    print *,  irp_here, ': Error in put_data_reply'
+    stop 'error'
+  endif
+
+end
+
+
+subroutine zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get psi_coef from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer, intent(in)            :: size_x
+  character*(*), intent(in)      :: name
+  double precision, intent(out)  :: x(size_x)
+  integer                        :: rc
+  integer*8                      :: rc8
+  character*(64)                 :: msg
+
+  write(msg,'(A8,1X,I8,1X,A230)') 'get_data', worker_id, name
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+  if (rc /= len(trim(msg))) then
+    print *,  irp_here, ': Error getting '//name
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:14) /= 'get_data_reply') then
+    print *,  rc, trim(msg)
+    print *,  irp_here, ': Error in get_data_reply'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0)
+  if (rc /= size_x*8) then
+    print *, irp_here, ': Error getting '//name
+    stop 'error'
+  endif
+end
+
+
+