diff --git a/doc/reference/c++/mpi/CMakeLists.txt b/doc/reference/c++/mpi/CMakeLists.txt
new file mode 100644
index 00000000..0876e42b
--- /dev/null
+++ b/doc/reference/c++/mpi/CMakeLists.txt
@@ -0,0 +1,5 @@
+# Doxygen sources
+#set_property(GLOBAL APPEND PROPERTY DOXYGEN_SOURCES ${TRIQS_SOURCE_DIR}/triqs/mpi/mpi.hpp)
+
+all_tests()
+
diff --git a/doc/reference/c++/mpi/mpi.rst b/doc/reference/c++/mpi/mpi.rst
new file mode 100644
index 00000000..1c46dde2
--- /dev/null
+++ b/doc/reference/c++/mpi/mpi.rst
@@ -0,0 +1,152 @@
+MPI
+===============
+
+.. warning::
+
+   Library of beta quality.
+
+   More functionality may be added in the future.
+
+Introduction
+--------------
+
+The purpose of the MPi library is to provide a simplified, C++-style API to the MPI routines for standard types
+(those for which an MPI type exists) and for composite higher-level objects, in particular the TRIQS arrays and Green's functions.
+
+The communication routines in the C API of the MPI library have require several parameters, such as the ``reduce`` operation:
+
+.. code-block:: c
+
+  int MPI_Reduce(void *sendbuf, void *recvbuf, int count,
+                 MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
+
+In principle, all parameters except for the communicator and id of the root process can be determined from the variable or object to be transmitted. 
+In most cases, we use ``MPI_COMM_WORLD`` as the communicator, take the id 0 for the root process and use ``MPI_SUM`` as the operation.
+
+This allows us to write 
+
+.. code-block:: c
+
+  int a = 5;
+  triqs::mpi::reduce(a);
+
+Such an interface is simpler to use and much less error prone. For higher-level objects, such as vectors or higher-dimensional arrays, the simplifcation is even more significant. Take the scatter and gather operations as examples:
+
+.. code-block:: c
+
+  int MPI_Scatter(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                  void *recvbuf, int recvcount, MPI_Datatype recvtype, int root,
+                  MPI_Comm comm)
+
+.. code-block:: c
+
+  int MPI_Gather(void *sendbuf, int sendcount, MPI_Datatype sendtype,
+                 void *recvbuf, int recvcount, MPI_Datatype recvtype, int root,
+                 MPI_Comm comm)
+
+In order to scatter a (contiguos) multidimensional array across all nodes, apply some operations to it and gather it back on the master one requires several lines of relatively complex code.
+The leading dimension of the array needs to be sliced, slice length and adress of the first element of each slice have to be computed and finally the MPI C API function has to be called.
+This can be packaged in the library once and for all. 
+
+Using the library these operations look as follows:
+
+.. code-block:: c
+
+ triqs::arrays::array<int, 3> A(8, 8, 8); // a three-dimensional array
+ triqs::mpi::scatter(A);
+ //do something with the corresponding part of A on each node
+ triqs::mpi::gather(A);
+ 
+All index computations are encapsulated in the triqs::mpi library calls.
+
+
+In principle, the Boost.MPI library provides a similar interface for basic types and standard library containers. Transmission of the data however requires serialization and doubles the required memory. This poses a severe limitation when large amounts of data are to be transmitted.
+
+In this library, we employ metaprogramming techniques for type deduction as well as a lazy mechanism to avoid unecessary copyies of data.
+ 
+MPI reference
+----------------
+
+In this document, we describe the use of the TRIQS MPI library. For more information on MPI, see, e.g., the `open MPI web pages <http://www.open-mpi.org>`_ or consult the MPI reference manual. For more information on Boost.MPI, refer to the `Boost library documentation <http://www.boost.org>`_.
+
+
+Supported functions and types
+------------------------------
+
+Currently, the TRIQS MPI library supports the following operations::
+
+  reduce
+  allreduce
+  broadcast
+  scatter
+  gather
+  allgather
+
+These routines have the same meaning as their corresponding MPI analogues.
+They work for all 'basic' types, i.e. types for which a native MPI-type exists. These are::
+
+  int
+  long
+  unsigned long
+  double
+  float
+  std::complex<double>
+
+We also support ``std::vector<T>`` for ``T`` being a basic type, as well as the types provided by the TRIQS ``array`` and TRIQS ``gf`` libraries.
+In addition, the library provides a mechanism to enable MPI support for custom containers based on the array or gf libraries.
+
+
+
+Basic usage
+-------------
+
+The syntax is inspired by Boost.MPI. In order to create an MPI environment, set up the communicator and broadcast a variable, use the following code block: 
+
+.. code-block:: c
+
+  int main(int argc, char* argv[]) {
+
+    mpi::environment env(argc, argv);
+    mpi::communicator world;
+
+    int a = 5;
+    broadcast(a, world);
+
+  }
+
+The declaration of the communicator is optional. If no communicator is passed to the routine, ``MPI_COMM_WORLD`` is used by default.
+
+All collective operations have the same signature. They take up to three arguments:
+
+.. code-block:: c
+
+  reduce(T const &x, communicator = {}, int root = 0)
+
+Here T can be any supported type. The communicator is optional. By default, the data will be collected on (or transmitted from) the process with id 0.
+
+Headers
+--------------
+
+Support for basic types is provided by the header ``triqs/mpi/base.hpp`` and for vectors and arrays by ``triqs/mpi/vector.hpp`` and ``tiqs/mpi/array.hpp``. For custom container types, the header ``triqs/mpi/generic.hpp`` is required. Support for Boost.MPI is provided by the ``triqs/mpi/boost.hpp`` header file. 
+
+For convenience, we provide the header::
+
+  triqs/mpi.hpp
+
+which includes the headers for basic, vector, array and generic type support.
+
+
+Doxygen documentation
+-------------------------
+
+The :doxy:`full C++ parameter documentation<triqs::utility::parameters>` and
+the :doxy:`parameter_defaults documentation<triqs::utility::parameter_defaults>` are available here.
+
+MPI example  
+-------------
+
+.. triqs_example:: ./mpi_0.cpp
+Simple MPI example.
+
+
+
diff --git a/doc/reference/c++/mpi/mpi_0.cpp b/doc/reference/c++/mpi/mpi_0.cpp
new file mode 100644
index 00000000..c60f1ff8
--- /dev/null
+++ b/doc/reference/c++/mpi/mpi_0.cpp
@@ -0,0 +1,30 @@
+#include <triqs/arrays.hpp>
+#include <triqs/mpi.hpp>
+#include <iostream>
+
+using namespace triqs;
+using namespace triqs::arrays;
+using namespace triqs::mpi;
+
+int main(int argc, char *argv[]) {
+
+ mpi::environment env(argc, argv);
+ mpi::communicator world;
+
+ int a = 5;
+ broadcast(a);
+ reduce_in_place(a);
+
+ array<int, 2> A(2,10); A()=1;
+
+ std::cout<<A<<std::endl;
+
+ scatter(A);
+ A += world.rank();
+ gather(A);
+
+ std::cout<<A<<std::endl;
+
+ return 0;
+}
+
diff --git a/test/triqs/mpi/mpi_vector.cpp b/test/triqs/mpi/mpi_vector.cpp
new file mode 100644
index 00000000..9cb1adbb
--- /dev/null
+++ b/test/triqs/mpi/mpi_vector.cpp
@@ -0,0 +1,78 @@
+/*******************************************************************************
+ *
+ * TRIQS: a Toolbox for Research in Interacting Quantum Systems
+ *
+ * Copyright (C) 2013 by O. Parcollet
+ *
+ * TRIQS is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * TRIQS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#include <iostream>
+#include <type_traits>
+#include <triqs/arrays.hpp>
+#include <triqs/mpi.hpp>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+using namespace triqs;
+using namespace triqs::arrays;
+using namespace triqs::mpi;
+
+template <typename T> 
+std::ostream & operator << (std::ostream & out, std::vector<T> const & v) { 
+for (size_t i =0; i<v.size(); ++i) out<< v[i];
+return out;
+}
+
+int main(int argc, char* argv[]) {
+
+ mpi::environment env(argc, argv);
+ mpi::communicator world;
+
+ // using ARR = array<double,2>;
+ using VEC = std::vector<std::complex<double>>;
+
+ VEC A(7), B(7), AA(7);
+
+ clef::placeholder<0> i_;
+ clef::placeholder<1> j_;
+
+ triqs::clef::make_expr(A)[i_] << i_+1;
+
+ B = mpi::scatter(A, world);
+
+ VEC C = mpi::scatter(A, world);
+
+ std::ofstream out("node" + std::to_string(world.rank()));
+ out << "  A = " << A << std::endl;
+ out << "  B = " << B << std::endl;
+ out << "  C = " << C << std::endl;
+
+ for(auto &x: B) x *= -1;
+ for(auto &x: AA) x = 0;
+
+ AA = mpi::gather(B, world);
+ out << " AA = " << AA << std::endl;
+
+ mpi::broadcast(AA, world);
+ out << " cast AA = " << AA << std::endl;
+
+ for (auto &x : AA) x = 0;
+
+ AA = mpi::allgather(B, world);
+ out << " AA = " << AA << std::endl;
+
+}
+
diff --git a/triqs/mpi/base.hpp b/triqs/mpi/base.hpp
index 7a5c8237..862f9537 100644
--- a/triqs/mpi/base.hpp
+++ b/triqs/mpi/base.hpp
@@ -116,7 +116,7 @@ namespace mpi {
 
   static T invoke(tag::allreduce, communicator c, T a, int root) {
    T b;
-   MPI_Allreduce(&a, &b, 1, D(), MPI_SUM, root, c.get());
+   MPI_Allreduce(&a, &b, 1, D(), MPI_SUM, c.get());
    return b;
   }
 
diff --git a/triqs/mpi/vector.hpp b/triqs/mpi/vector.hpp
index b406e99b..3f1b1b9c 100644
--- a/triqs/mpi/vector.hpp
+++ b/triqs/mpi/vector.hpp
@@ -38,7 +38,11 @@ namespace mpi {
   }
 
   // -----------
-  static void broadcast(communicator c, std::vector<T> &a, int root) { MPI_Bcast(a.data(), a.size(), D(), root, c.get()); }
+  static void broadcast(communicator c, std::vector<T> &a, int root) {
+   size_t s=a.size();
+   MPI_Bcast(&s, 1, mpi_datatype<size_t>::invoke(), root, c.get());
+   if(c.rank() != root) a.resize(s);
+ MPI_Bcast(a.data(), a.size(), D(), root, c.get()); }
 
   // -----------
   static std::vector<T> invoke(tag::reduce, communicator c, T const &a, int root) {
@@ -91,7 +95,7 @@ namespace mpi {
   // -----------
 
   static std::vector<T> invoke(tag::allgather, communicator c, std::vector<T> const &a, int root) {
-   long size = reduce(a.size(), c, root);
+   long size = allreduce(a.size(), c, root);
    std::vector<T> b(size);
 
    auto recvcounts = std::vector<int>(c.size());