diff --git a/test/triqs/mpi/mpi_array.cpp b/test/triqs/mpi/mpi_array.cpp
index e02bf7e9..cea0b1b1 100644
--- a/test/triqs/mpi/mpi_array.cpp
+++ b/test/triqs/mpi/mpi_array.cpp
@@ -45,7 +45,10 @@ int main(int argc, char* argv[]) {
 
  A(i_, j_) << i_ + 10 * j_;
 
+ //std::cerr << "B0 "<< B <<std::endl;
  B = mpi::scatter(A, world);
+ std::cerr << "B "<< B <<std::endl;
+
  ARR C = mpi::scatter(A, world);
 
  std::ofstream out("node" + std::to_string(world.rank()));
@@ -66,5 +69,14 @@ int main(int argc, char* argv[]) {
 
  AA = mpi::allgather(B, world);
  out << " AA = " << AA << std::endl;
+
+ ARR r1 = mpi::reduce(A, world);
+ out <<" Reduce "<< std::endl;
+ out << " r1 = " << r1 << std::endl;
+
+ ARR r2 = mpi::allreduce(A, world);
+ out <<" AllReduce "<< std::endl;
+ out << " r2 = " << r2 << std::endl;
+
 }
 
diff --git a/test/triqs/mpi/mpi_generic.cpp b/test/triqs/mpi/mpi_generic.cpp
index 2aae06c5..1201f7d5 100644
--- a/test/triqs/mpi/mpi_generic.cpp
+++ b/test/triqs/mpi/mpi_generic.cpp
@@ -49,7 +49,7 @@ struct my_object {
 
  // assigment is almost done already...
  template <typename Tag> my_object &operator=(mpi_lazy<Tag, my_object> x) {
-  return mpi_impl_tuple<my_object>::complete_operation(*this, x);
+  return mpi_impl<my_object>::complete_operation(*this, x);
  }
 };
 
diff --git a/test/triqs/mpi/mpi_gf.cpp b/test/triqs/mpi/mpi_gf.cpp
new file mode 100644
index 00000000..d60d193a
--- /dev/null
+++ b/test/triqs/mpi/mpi_gf.cpp
@@ -0,0 +1,104 @@
+/*******************************************************************************
+ *
+ * TRIQS: a Toolbox for Research in Interacting Quantum Systems
+ *
+ * Copyright (C) 2013 by O. Parcollet
+ *
+ * TRIQS is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * TRIQS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#define TRIQS_ARRAYS_ENFORCE_BOUNDCHECK
+#include <iostream>
+#include <type_traits>
+#include <triqs/gfs.hpp>
+#include <triqs/mpi.hpp>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+using namespace triqs;
+using namespace triqs::arrays;
+using namespace triqs::gfs;
+using namespace triqs::clef;
+
+int main(int argc, char* argv[]) {
+
+ mpi::environment env(argc, argv);
+ mpi::communicator world;
+
+ std::ofstream out("node" + std::to_string(world.rank()));
+
+ double beta = 10;
+ int Nfreq = 8;
+ placeholder<0> w_;
+
+ auto g1 = gf<imfreq>{{beta, Fermion, Nfreq}, {1, 1}}; // using ARR = array<double,2>;
+ g1(w_) << 1 / (w_ + 1);
+
+ out << "g1.data" << g1.data() << std::endl;
+
+ { 
+  out<< "reduction "<< std::endl;
+  gf<imfreq> g2 = mpi::reduce(g1, world);
+  out << g2.data()<<std::endl;
+  out << g2.singularity() << std::endl;
+ }
+ 
+ { 
+  out<< "all reduction "<< std::endl;
+  gf<imfreq> g2 = mpi::allreduce(g1, world);
+  out << g2.data()<<std::endl;
+  out << g2.singularity() << std::endl;
+ }
+
+ {
+  out << "scatter-gather test with =" << std::endl;
+  auto g2 = g1;
+  auto g2b = g1;
+
+  g2 = mpi::scatter(g1);
+  g2(w_) << g2(w_) * (1 + world.rank());
+  g2b = mpi::gather(g2);
+
+  out << g2b.data() << std::endl;
+ }
+
+ {
+  out << "scatter-allgather test with construction" << std::endl;
+
+  gf<imfreq> g2 = mpi::scatter(g1);
+  g2(w_) << g2(w_) * (1 + world.rank());
+  g1 = mpi::allgather(g2);
+
+  out << g1.data() << std::endl;
+ }
+
+ {
+  out << "Building directly scattered, and gather" << std::endl;
+  auto m = mpi_scatter(gf_mesh<imfreq>{beta, Fermion, Nfreq}, world, 0);
+  auto g3 = gf<imfreq>{m, {1, 1}}; 
+  g3(w_) << 1 / (w_ + 1);
+  auto g4 = g3;
+  out<< "chunk ..."<<std::endl;
+  out << g3.data() << std::endl;
+  out<< "gather"<<std::endl;
+  g4 = mpi::gather(g3);
+  out << g4.data() << std::endl;
+  out<< "allgather"<<std::endl;
+  g4 = mpi::allgather(g3);
+  out << g4.data() << std::endl;
+ }
+
+}
+
diff --git a/triqs/gfs/gf.hpp b/triqs/gfs/gf.hpp
index e366e3ec..8103a558 100644
--- a/triqs/gfs/gf.hpp
+++ b/triqs/gfs/gf.hpp
@@ -26,6 +26,7 @@
 #include <triqs/utility/tuple_tools.hpp>
 #include <triqs/utility/c14.hpp>
 #include <triqs/arrays/h5.hpp>
+#include <triqs/mpi/gf.hpp>
 #include <vector>
 #include "./tools.hpp"
 #include "./data_proxies.hpp"
@@ -424,6 +425,9 @@ namespace gfs {
      : B() {
    *this = x;
   }
+ 
+  // mpi lazy 
+  template <typename Tag> gf(mpi::mpi_lazy<Tag, gf> x) : gf() { operator=(x); }
 
   gf(typename B::mesh_t m, typename B::data_t dat, typename B::singularity_view_t const &si, typename B::symmetry_t const &s,
      typename B::indices_t const &ind, std::string name = "")
@@ -453,6 +457,13 @@ namespace gfs {
    return *this;
   }
 
+  friend struct mpi::mpi_impl_triqs_gfs<gf>; //allowed to modify mesh
+
+  // 
+  template <typename Tag> void operator=(mpi::mpi_lazy<Tag, gf> x) {
+   mpi::mpi_impl_triqs_gfs<gf>::complete_operation(*this, x);
+  }
+
   template <typename RHS> void operator=(RHS &&rhs) {
    this->_mesh = rhs.mesh();
    this->_data.resize(get_gf_data_shape(rhs));
@@ -841,6 +852,17 @@ namespace gfs {
   };
  } // gfs_implementation
 }
+
+namespace mpi { 
+
+  template <typename Variable, typename Target, typename Opt>
+ struct mpi_impl<gfs::gf<Variable, Target, Opt>, void> : mpi_impl_triqs_gfs<gfs::gf<Variable, Target, Opt>> {};
+
+ template <typename Variable, typename Target, typename Opt, bool IsConst>
+ struct mpi_impl<gfs::gf_view<Variable, Target, Opt, IsConst>, void> : mpi_impl_triqs_gfs<gfs::gf_view<Variable, Target, Opt, IsConst>> {};
+
+}
+
 }
 
 // same as for arrays : views cannot be swapped by the std::swap. Delete it
diff --git a/triqs/gfs/local/tail.hpp b/triqs/gfs/local/tail.hpp
index 68c2fcb8..5de8e8e4 100644
--- a/triqs/gfs/local/tail.hpp
+++ b/triqs/gfs/local/tail.hpp
@@ -18,11 +18,12 @@
  * TRIQS. If not, see <http://www.gnu.org/licenses/>.
  *
  ******************************************************************************/
-#ifndef TRIQS_GF_LOCAL_TAIL_H
-#define TRIQS_GF_LOCAL_TAIL_H
+#pragma once
 #include <triqs/arrays.hpp>
 #include <triqs/arrays/algorithms.hpp>
 #include <triqs/gfs/tools.hpp>
+#include <triqs/mpi/boost.hpp>
+#include <boost/serialization/complex.hpp>
 
 namespace triqs { namespace gfs { namespace local {
 
@@ -50,6 +51,7 @@ namespace triqs { namespace gfs { namespace local {
   /// A common implementation class. Idiom: ValueView
   template<bool IsView> class tail_impl  {
     public:
+      TRIQS_MPI_IMPLEMENTED_VIA_BOOST;
       typedef tail_view view_type;
       typedef tail      regular_type;
 
@@ -171,8 +173,9 @@ namespace triqs { namespace gfs { namespace local {
         }
 
       friend std::ostream & operator << (std::ostream & out, tail_impl const & x) {
+        if (x.data().is_empty()) return out << "empty tail"<<std::endl;
         out <<"tail/tail_view: min/smallest/max = "<< x.order_min() << " " << x.smallest_nonzero() << " "<< x.order_max();
-        for (long u = x.order_min(); u <= x.order_max(); ++u) out <<"\n ...  Order "<<u << " = " << x(u);
+	for (long u = x.order_min(); u <= x.order_max(); ++u) out <<"\n ...  Order "<<u << " = " << x(u);
         return out;
       }
 
@@ -283,7 +286,7 @@ namespace triqs { namespace gfs { namespace local {
   inline tail transpose(tail_view t) { return {transposed_view(t.data(),0,2,1), transposed_view(t.mask_view(),1,0),t.order_min()};}
 
   /// Slice in orbital space
-  //template<bool V> tail_view slice_target(tail_impl<V> const & t, tqa::range R1, tqa::range R2) {
+  //template<bool V> tail_view slice_target(tail_impl<V> const & t, tqa::range R1, tqa::range R2) 
   inline tail_view slice_target(tail_view t, tqa::range R1, tqa::range R2) {
     return tail_view(t.data()(tqa::range(),R1,R2), t.mask_view()(R1,R2), t.order_min());
   }
@@ -407,7 +410,5 @@ namespace triqs { namespace gfs { namespace local {
 
 #undef DEFINE_OPERATOR
 
-  
-
-}}}
-#endif
+}}
+}
diff --git a/triqs/gfs/meshes/matsubara_freq.hpp b/triqs/gfs/meshes/matsubara_freq.hpp
index aecdf41e..90245eac 100644
--- a/triqs/gfs/meshes/matsubara_freq.hpp
+++ b/triqs/gfs/meshes/matsubara_freq.hpp
@@ -35,11 +35,21 @@ namespace gfs {
   using domain_pt_t = typename domain_t::point_t;
 
   /// Constructor
-  matsubara_freq_mesh() : _dom(), _n_pts(0), _positive_only(true) {}
+  matsubara_freq_mesh(domain_t dom, long n_pts = 1025, bool positive_only = true)
+     : _dom(std::move(dom)), _n_pts(n_pts), _positive_only(positive_only) {
+   if (_positive_only) {
+    _first_index = 0;
+    _last_index = n_pts - 1; // CORRECTION
+   } else {
+    _last_index = (_n_pts - (_dom.statistic == Boson ? 1 : 2)) / 2;
+    _first_index = -(_last_index + (_dom.statistic == Fermion));
+   }
+   _first_index_window = _first_index;
+   _last_index_window = _last_index;
+  }
 
   /// Constructor
-  matsubara_freq_mesh(domain_t dom, int n_pts=1025, bool positive_only = true)
-     : _dom(std::move(dom)), _n_pts(n_pts), _positive_only(positive_only) {}
+  matsubara_freq_mesh() : matsubara_freq_mesh(domain_t(), 0, true){}
 
   /// Constructor
   matsubara_freq_mesh(double beta, statistic_enum S, int n_pts = 1025, bool positive_only = true)
@@ -48,6 +58,17 @@ namespace gfs {
   /// Copy constructor 
   matsubara_freq_mesh(matsubara_freq_mesh const &) = default;
 
+  /// Scatter a mesh over the communicator c
+  friend matsubara_freq_mesh mpi_scatter(matsubara_freq_mesh m, mpi::communicator c, int root) {
+   auto m2 = matsubara_freq_mesh{m.domain(), m.size(), m.positive_only()};
+   std::tie(m2._first_index_window, m2._last_index_window) = mpi::slice_range(m2._first_index, m2._last_index, c.size(), c.rank());
+   return m2;
+  }
+
+  friend matsubara_freq_mesh mpi_gather(matsubara_freq_mesh m, mpi::communicator c, int root) {
+   return matsubara_freq_mesh{m.domain(), m.size(), m.positive_only()};
+  }
+
   /// The corresponding domain
   domain_t const &domain() const { return _dom; }
 
@@ -60,20 +81,29 @@ namespace gfs {
    **/
 
   /// last Matsubara index 
-  int last_index() const { return (_positive_only ? _n_pts : (_n_pts - (_dom.statistic == Boson ? 1 : 2))/2);}
+  int last_index() const { return _last_index;}
 
   /// first Matsubara index
-  int first_index() const { return -(_positive_only ? 0 : last_index() + (_dom.statistic == Fermion)); }
+  int first_index() const { return _first_index;}
+
+  /// last Matsubara index of the window 
+  int last_index_window() const { return _last_index_window;}
+
+  /// first Matsubara index of the window
+  int first_index_window() const { return _first_index_window;}
 
   /// Size (linear) of the mesh
-  long size() const { return _n_pts;}
+  //long size() const { return _n_pts;}
+
+  /// Size (linear) of the mesh of the window
+  long size() const { return _last_index_window - _first_index_window + 1; }
 
   /// From an index of a point in the mesh, returns the corresponding point in the domain
   domain_pt_t index_to_point(index_t ind) const { return 1_j * M_PI * (2 * ind + (_dom.statistic == Fermion)) / _dom.beta; }
 
   /// Flatten the index in the positive linear index for memory storage (almost trivial here).
-  long index_to_linear(index_t ind) const { return ind - first_index(); }
-  index_t linear_to_index(long lind) const { return lind + first_index(); }
+  long index_to_linear(index_t ind) const { return ind - first_index_window(); }
+  index_t linear_to_index(long lind) const { return lind + first_index_window(); }
 
   /// Is the mesh only for positive omega_n (G(tau) real))
   bool positive_only() const { return _positive_only;}
@@ -86,20 +116,20 @@ namespace gfs {
   struct mesh_point_t : tag::mesh_point, matsubara_freq {
    mesh_point_t() = default;
    mesh_point_t(matsubara_freq_mesh const &mesh, index_t const &index_)
-      : matsubara_freq(index_, mesh.domain().beta, mesh.domain().statistic),
-        first_index(mesh.first_index()),
-        index_stop(mesh.first_index() + mesh.size() - 1) {}
-   mesh_point_t(matsubara_freq_mesh const &mesh) : mesh_point_t(mesh, mesh.first_index()) {}
+      : matsubara_freq(index_, mesh.domain().beta, mesh.domain().statistic)
+      , first_index_window(mesh.first_index_window())
+      , last_index_window(mesh.last_index_window()) {}
+   mesh_point_t(matsubara_freq_mesh const &mesh) : mesh_point_t(mesh, mesh.first_index_window()) {}
    void advance() { ++n; }
-   long linear_index() const { return n - first_index; }
+   long linear_index() const { return n - first_index_window; }
    long index() const { return n; }
-   bool at_end() const { return (n == index_stop + 1); } // at_end means " one after the last one", as in STL
-   void reset() { n = first_index; }
+   bool at_end() const { return (n == last_index_window + 1); } // at_end means " one after the last one", as in STL
+   void reset() { n = first_index_window; }
 
    private:
-   index_t first_index, index_stop;
+   index_t first_index_window, last_index_window;
   };
-
+ 
   /// Accessing a point of the mesh from its index
   mesh_point_t operator[](index_t i) const {
    return {*this, i};
@@ -164,6 +194,7 @@ namespace gfs {
   domain_t _dom;
   int _n_pts;
   bool _positive_only;
+  long _first_index, _last_index, _first_index_window, _last_index_window;
  };
 
  //-------------------------------------------------------
diff --git a/triqs/gfs/meshes/product.c14.hpp b/triqs/gfs/meshes/product.c14.hpp
index c8200337..3c90620f 100644
--- a/triqs/gfs/meshes/product.c14.hpp
+++ b/triqs/gfs/meshes/product.c14.hpp
@@ -39,6 +39,7 @@ namespace gfs {
 
   mesh_product() {}
   mesh_product(Meshes const &... meshes) : m_tuple(meshes...), _dom(meshes.domain()...) {}
+  mesh_product(mesh_product const &) = default;
 
   domain_t const &domain() const { return _dom; }
   m_tuple_t const &components() const { return m_tuple; }
@@ -49,6 +50,20 @@ namespace gfs {
    return triqs::tuple::fold([](auto const &m, size_t R) { return R * m.size(); }, m_tuple, 1);
   }
 
+  /// Scatter the first mesh over the communicator c
+  friend mesh_product mpi_scatter(mesh_product const &m, mpi::communicator c, int root) {
+   auto r = m; // same domain, but mesh with a window. Ok ?
+   std::get<0>(r.m_tuple) = mpi_scatter(std::get<0>(r.m_tuple), c, root);
+   return r;
+  }
+
+  /// Opposite of scatter : rebuild the original mesh, without a window
+  friend matsubara_freq_mesh mpi_gather(matsubara_freq_mesh m, mpi::communicator c, int root) {
+   auto r = m; // same domain, but mesh with a window. Ok ?
+   std::get<0>(r.m_tuple) = mpi_gather(std::get<0>(r.m_tuple), c, root);
+   return r;
+  }
+
   /// Conversions point <-> index <-> linear_index
   typename domain_t::point_t index_to_point(index_t const &ind) const {
    domain_pt_t res;
diff --git a/triqs/mpi/arrays.hpp b/triqs/mpi/arrays.hpp
index 17b79624..4dde987b 100644
--- a/triqs/mpi/arrays.hpp
+++ b/triqs/mpi/arrays.hpp
@@ -40,8 +40,12 @@ namespace mpi {
    auto dims = ref.shape();
    long slow_size = first_dim(ref);
    
+   if (std::is_same<Tag, tag::reduce>::value) {
+    // optionally check all dims are the same ?
+   }
+ 
    if (std::is_same<Tag, tag::scatter>::value) {
-    dims[0] = slice_length(slow_size - 1, c, c.rank());
+    dims[0] = mpi::slice_length(slow_size - 1, c.size(), c.rank());
    }
    
    if (std::is_same<Tag, tag::gather>::value) {
@@ -87,7 +91,7 @@ namespace mpi {
   static void allreduce_in_place(communicator c, A &a, int root) {
    check_is_contiguous(a);
    // assume arrays have the same size on all nodes...
-   MPI_Allreduce(MPI_IN_PLACE, a.data_start(), a.domain().number_of_elements(), D(), MPI_SUM, root, c.get());
+   MPI_Allreduce(MPI_IN_PLACE, a.data_start(), a.domain().number_of_elements(), D(), MPI_SUM, c.get());
   }
 
   //---------
@@ -137,6 +141,18 @@ namespace arrays {
    private:
    static MPI_Datatype D() { return mpi::mpi_datatype<typename A::value_type>::invoke(); }
 
+   //---------------------------------
+   void _invoke(triqs::mpi::tag::reduce) {
+    lhs.resize(laz.domain());
+    MPI_Reduce((void *)laz.ref.data_start(), (void *)lhs.data_start(), laz.ref.domain().number_of_elements(), D(), MPI_SUM, laz.root, laz.c.get());
+   }
+
+   //---------------------------------
+   void _invoke(triqs::mpi::tag::allreduce) {
+    lhs.resize(laz.domain());
+    MPI_Allreduce((void *)laz.ref.data_start(), (void *)lhs.data_start(), laz.ref.domain().number_of_elements(), D(), MPI_SUM, laz.c.get());
+   }
+
    //---------------------------------
    void _invoke(triqs::mpi::tag::scatter) {
     lhs.resize(laz.domain());
@@ -146,10 +162,10 @@ namespace arrays {
     auto slow_stride = laz.ref.indexmap().strides()[0];
     auto sendcounts = std::vector<int>(c.size());
     auto displs = std::vector<int>(c.size() + 1, 0);
-    int recvcount = slice_length(slow_size - 1, c, c.rank()) * slow_stride;
+    int recvcount = mpi::slice_length(slow_size - 1, c.size(), c.rank()) * slow_stride;
 
     for (int r = 0; r < c.size(); ++r) {
-     sendcounts[r] = slice_length(slow_size - 1, c, r) * slow_stride;
+     sendcounts[r] = mpi::slice_length(slow_size - 1, c.size(), r) * slow_stride;
      displs[r + 1] = sendcounts[r] + displs[r];
     }
 
diff --git a/triqs/mpi/base.hpp b/triqs/mpi/base.hpp
index 862f9537..40e81d4f 100644
--- a/triqs/mpi/base.hpp
+++ b/triqs/mpi/base.hpp
@@ -23,6 +23,12 @@
 //#include <triqs/utility/tuple_tools.hpp>
 #include <mpi.h>
 
+namespace boost { // forward declare in case we do not include boost.
+namespace mpi {
+ class communicator;
+}
+}
+
 namespace triqs {
 namespace mpi {
 
@@ -41,6 +47,11 @@ namespace mpi {
 
   MPI_Comm get() const { return _com; }
 
+  inline communicator(boost::mpi::communicator);
+
+  /// Cast to the boost mpi communicator
+  inline operator boost::mpi::communicator () const;
+
   int rank() const {
    int num;
    MPI_Comm_rank(_com, &num);
@@ -67,6 +78,13 @@ namespace mpi {
 
  /// The implementation of mpi ops for each type
  template <typename T, typename Enable = void> struct mpi_impl;
+ 
+ /// A small lazy tagged class 
+ template <typename Tag, typename T> struct mpi_lazy {
+  T const &ref;
+  int root;
+  communicator c;
+ };
 
  // ----------------------------------------
  // ------- top level functions -------
@@ -136,6 +154,26 @@ namespace mpi {
  struct mpi_impl<T, std14::enable_if_t<std::is_arithmetic<T>::value || triqs::is_complex<T>::value>> : mpi_impl_basic<T> {};
 
  //------------ Some helper function
+
+ // Given a range [first, last], slice it regularly for a node of rank 'rank' among n_nodes.
+ // If the range is not dividable in n_nodes equal parts,
+ // the first nodes have one more elements than the last ones.
+ inline std::pair<long, long> slice_range(long first, long last, int n_nodes, int rank) {
+  long chunk = (last - first + 1) / n_nodes;
+  long n_large_nodes = (last - first + 1) - n_nodes * chunk;
+  if (rank <= n_large_nodes - 1) // first, larger nodes, use chunk + 1
+   return {first + rank * (chunk + 1), first + (rank + 1) * (chunk + 1) - 1};
+  else // others nodes : shift the first by 1*n_large_nodes, used chunk
+   return {first + n_large_nodes + rank * chunk, first + n_large_nodes + (rank + 1) * chunk - 1};
+ }
+
+ // TODO RECHECK TEST 
+ inline long slice_length(long imax, int n_nodes, int rank) {
+  auto r = slice_range(0, imax, n_nodes, rank);
+  return r.second - r.first + 1;
+ }
+
+ /*
  inline long slice_length(size_t imax, communicator c, int r) {
   auto imin = 0;
   long j = (imax - imin + 1) / c.size();
@@ -143,6 +181,7 @@ namespace mpi {
   auto r_min = (r <= i - 1 ? imin + r * (j + 1) : imin + r * j + i);
   auto r_max = (r <= i - 1 ? imin + (r + 1) * (j + 1) - 1 : imin + (r + 1) * j + i - 1);
   return r_max - r_min + 1;
- };
+ }
+ */
 }
 }
diff --git a/triqs/mpi/boost.hpp b/triqs/mpi/boost.hpp
index d8a83676..74a76e6a 100644
--- a/triqs/mpi/boost.hpp
+++ b/triqs/mpi/boost.hpp
@@ -22,9 +22,20 @@
 #include "./base.hpp"
 #include <boost/mpi.hpp>
 
+#define TRIQS_MPI_IMPLEMENTED_VIA_BOOST using triqs_mpi_via_boost = void;
+
 namespace triqs {
 namespace mpi {
 
+ // implement the communicator cast
+ inline communicator::operator boost::mpi::communicator() const {
+  return boost::mpi::communicator(_com, boost::mpi::comm_duplicate); 
+  // duplicate policy : cf http://www.boost.org/doc/libs/1_56_0/doc/html/boost/mpi/comm_create_kind.html
+ }
+
+ // reverse : construct (implicit) the communicator from the boost one.
+ inline communicator::communicator(boost::mpi::communicator c) :_com(c) {}
+
  /** ------------------------------------------------------------
    *  Type which we use boost::mpi
    *  ----------------------------------------------------------  **/
@@ -39,7 +50,7 @@ namespace mpi {
 
   static T invoke(tag::allreduce, communicator c, T const &a, int root) {
    T b;
-   boost::mpi::all_reduce(c, a, b, std::c14::plus<>(), root);
+   boost::mpi::all_reduce(c, a, b, std::c14::plus<>());
    return b;
   }
 
@@ -51,8 +62,8 @@ namespace mpi {
   static void allgather(communicator c, T const &, int root) = delete;
  };
 
- // default
- //template <typename T> struct mpi_impl<T> : mpi_impl_boost_mpi<T> {};
+ // If type T has a mpi_implementation nested struct, then it is mpi_impl<T>.
+ template <typename T> struct mpi_impl<T, typename T::triqs_mpi_via_boost> : mpi_impl_boost_mpi<T> {};
 
 }}//namespace
 
diff --git a/triqs/mpi/generic.hpp b/triqs/mpi/generic.hpp
index fb3523f8..c64764a6 100644
--- a/triqs/mpi/generic.hpp
+++ b/triqs/mpi/generic.hpp
@@ -23,25 +23,30 @@
 #include <triqs/utility/tuple_tools.hpp>
 
 #define TRIQS_MPI_IMPLEMENTED_AS_TUPLEVIEW using triqs_mpi_as_tuple = void;
+#define TRIQS_MPI_IMPLEMENTED_AS_TUPLEVIEW_NO_LAZY using triqs_mpi_as_tuple_no_lazy = void;
 namespace triqs {
 namespace mpi {
 
- template <typename Tag, typename T> struct mpi_lazy {
-  T const &ref;
-  int root;
-  communicator c;
- };
-
  /** ------------------------------------------------------------
   *  Type which are recursively treated by reducing them to a tuple
   *  of smaller objects.
   *  ----------------------------------------------------------  **/
- template <typename T> struct mpi_impl_tuple {
+ template <typename T, bool with_lazy> struct mpi_impl_tuple {
 
   mpi_impl_tuple() = default;
-  template <typename Tag> static mpi_lazy<Tag, T> invoke(Tag, communicator c, T const &a, int root) {
+
+  /// invoke
+  template <typename Tag> static mpi_lazy<Tag, T> invoke_impl(std::true_type, Tag, communicator c, T const &a, int root) {
    return {a, root, c};
   }
+  
+  template <typename Tag> static T &invoke_impl(std::false_type, Tag, communicator c, T const &a, int root) {
+   return complete_operation(a, {a, root, c});
+  }
+
+  template <typename Tag> static mpi_lazy<Tag, T> invoke(Tag, communicator c, T const &a, int root) {
+   return invoke_impl(std::integral_constant<bool, with_lazy>(), Tag(), c, a, root);
+  }
 
 #ifdef __cpp_generic_lambdas
   static void reduce_in_place(communicator c, T &a, int root) {
@@ -57,6 +62,7 @@ namespace mpi {
    triqs::tuple::for_each_zip(l, view_as_tuple(target), view_as_tuple(laz.ref));
    return target;
   }
+
 #else
 
   struct aux1 {
@@ -89,15 +95,17 @@ namespace mpi {
    }
   };
 
-  template <typename Tag> static void complete_operation(T &target, mpi_lazy<Tag, T> laz) {
+  template <typename Tag> static T& complete_operation(T &target, mpi_lazy<Tag, T> laz) {
    auto l = aux3<Tag>{laz};
    triqs::tuple::for_each_zip(l, view_as_tuple(target), view_as_tuple(laz.ref));
+   return target;
   }
 #endif
  };
 
  // If type T has a mpi_implementation nested struct, then it is mpi_impl<T>.
- template <typename T> struct mpi_impl<T, typename T::triqs_mpi_as_tuple> : mpi_impl_tuple<T> {};
+ template <typename T> struct mpi_impl<T, typename T::triqs_mpi_as_tuple> : mpi_impl_tuple<T, true> {};
+ template <typename T> struct mpi_impl<T, typename T::triqs_mpi_as_tuple_no_lazy> : mpi_impl_tuple<T, false> {};
 }
 } // namespace
 
diff --git a/triqs/mpi/gf.hpp b/triqs/mpi/gf.hpp
new file mode 100644
index 00000000..f11a8c1e
--- /dev/null
+++ b/triqs/mpi/gf.hpp
@@ -0,0 +1,98 @@
+/*******************************************************************************
+ *
+ * TRIQS: a Toolbox for Research in Interacting Quantum Systems
+ *
+ * Copyright (C) 2014 by O. Parcollet
+ *
+ * TRIQS is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * TRIQS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#pragma once
+#include "./base.hpp"
+#include <triqs/mpi/generic.hpp>
+
+namespace triqs {
+namespace mpi {
+
+ //--------------------------------------------------------------------------------------------------------
+
+ // When value_type is a basic type, we can directly call the C API
+ template <typename G> struct mpi_impl_triqs_gfs {
+
+  //---------
+  static void reduce_in_place(communicator c, G &g, int root) {
+   triqs::mpi::reduce_in_place(c, g.data(), root);
+   triqs::mpi::reduce_in_place(c, g.singularity(), root);
+  }
+
+  //---------
+  /*static void allreduce_in_place(communicator c, G &g, int root) {
+   triqs::mpi::allreduce_in_place(c, g.data(), root);
+   triqs::mpi::allreduce_in_place(c, g.singularity(), root);
+  }
+*/
+
+  //---------
+  static void broadcast(communicator c, G &g, int root) {
+   triqs::mpi::broadcast(c, g.data(), root);
+   triqs::mpi::broadcast(c, g.singularity(), root);
+  }
+
+  //---------
+  template <typename Tag> static mpi_lazy<Tag, G> invoke(Tag, communicator c, G const &g, int root) {
+   return {g, root, c};
+  }
+
+  //---- reduce ----
+  static G &complete_operation(G &target, mpi_lazy<tag::reduce, G> laz) {
+   target._data = mpi::reduce(laz.ref.data(), laz.c, laz.root);
+   target._singularity = mpi::reduce(laz.ref.singularity(), laz.c, laz.root);
+   return target;
+  }
+
+  //---- allreduce ----
+  static G &complete_operation(G &target, mpi_lazy<tag::allreduce, G> laz) {
+   target._data = mpi::allreduce(laz.ref.data(), laz.c, laz.root);
+   target._singularity = mpi::allreduce(laz.ref.singularity(), laz.c, laz.root);
+   return target;
+  }
+
+  //---- scatter ----
+  static G &complete_operation(G &target, mpi_lazy<tag::scatter, G> laz) {
+   target._mesh = mpi_scatter(laz.ref.mesh(), laz.c, laz.root); 
+   target._data = mpi::scatter(laz.ref.data(), laz.c, laz.root); // HERE ADD OPTION FOR CHUNCK
+   target._singularity = laz.ref.singularity();
+   //mpi::broadcast(target._singularity, laz.c, laz.root);
+   return target;
+  }
+ 
+  //---- gather ----
+  static G &complete_operation(G &target, mpi_lazy<tag::gather, G> laz) {
+   target._mesh = mpi_gather(laz.ref.mesh(), laz.c, laz.root); 
+   target._data = mpi::gather(laz.ref.data(), laz.c, laz.root); // HERE ADD OPTION FOR CHUNCK
+   // do nothing for singularity
+   return target;
+  }
+
+  //---- allgather ----
+  static G &complete_operation(G &target, mpi_lazy<tag::allgather, G> laz) {
+   target._data = mpi::allgather(laz.ref.data(), laz.c, laz.root); // HERE ADD OPTION FOR CHUNCK
+   // do nothing for singularity
+   return target;
+  }
+
+ };
+
+} // mpi namespace 
+} // namespace triqs
diff --git a/triqs/mpi/vector.hpp b/triqs/mpi/vector.hpp
index 3f1b1b9c..e1ae5007 100644
--- a/triqs/mpi/vector.hpp
+++ b/triqs/mpi/vector.hpp
@@ -64,11 +64,11 @@ namespace mpi {
    auto slow_size = a.size();
    auto sendcounts = std::vector<int>(c.size());
    auto displs = std::vector<int>(c.size() + 1, 0);
-   int recvcount = slice_length(slow_size - 1, c, c.rank());
+   int recvcount = slice_length(slow_size - 1, c.size(), c.rank());
    std::vector<T> b(recvcount);
 
    for (int r = 0; r < c.size(); ++r) {
-    sendcounts[r] = slice_length(slow_size - 1, c, r);
+    sendcounts[r] = slice_length(slow_size - 1, c.size(), r);
     displs[r + 1] = sendcounts[r] + displs[r];
    }