diff --git a/deadlock.png b/deadlock.png new file mode 100644 index 0000000..ae75653 Binary files /dev/null and b/deadlock.png differ diff --git a/parallelism_scemama.org b/parallelism_scemama.org index a559ed2..d02c16f 100644 --- a/parallelism_scemama.org +++ b/parallelism_scemama.org @@ -1212,6 +1212,25 @@ $ python pi_server_python.py & - Receive: ~MPI_RECV(buffer, count, datatype, source, tag, communicator, status, ierror)~ + +** Synchronous or asynchronous? + + - ~MPI_Ssend~ :: Blocking synchronous send. Returns upon notification + that the message has been received (safe). + - ~MPI_Isend~ :: Non-blocking send. Returns immediately (dangerous). + - ~MPI_Send~ :: Returns when the send buffer is ready for + use. Non-deterministic: depends on MPI implementation, size of + the data, number of ranks, ... (dangerous) + + #+LATEX: \begin{alertblock}{Important} + - Writing the program and Debugging: + - /Always/ use ~MPI_Ssend~ + - Use ~MPI_Isend~ only when ~MPI_Ssend~ is irrelevant + - Once the code is well checked + - You can replace ~MPI_Ssend~ by ~MPI_Send~ as an optimization. + - If ~MPI_Send~ is still too slow, consider rewriting for ~MPI_Isend~ + #+LATEX: \end{alertblock} + ** Point-to-point communication (Python) #+begin_src python :tangle Codes/mpi_rank.py @@ -1225,7 +1244,7 @@ def main(): if rank == 0: data = 42 print("Before: Rank: %d Size: %d Data: %d"%(rank, size, data)) - comm.send(data, dest=1, tag=11) + comm.ssend(data, dest=1, tag=11) print("After : Rank: %d Size: %d Data: %d"%(rank, size, data)) elif rank == 1: data = 0 @@ -1278,7 +1297,7 @@ program test_rank if (rank == 0) then data = 42 print *, "Before: Rank:", rank, "Size:", size, "Data: ", data - call MPI_SEND(data, 1, MPI_INTEGER, 1, 11, MPI_COMM_WORLD, ierr) + call MPI_SSEND(data, 1, MPI_INTEGER, 1, 11, MPI_COMM_WORLD, ierr) print *, "After : Rank:", rank, "Size:", size, "Data: ", data else if (rank == 1) then @@ -1293,18 +1312,223 @@ program test_rank end program #+end_src +** Deadlocks + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.3\textwidth} + #+ATTR_LATEX: :height 0.8 \textheight + [[./deadlock.png]] + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.7\textwidth} + + #+LATEX: \begin{exampleblock}{Deadlock} + Each process waits for a message coming from another process + #+LATEX: \end{exampleblock} + + Example: round-robin + #+LATEX: \end{column} + #+LATEX: \end{columns} + +** Deadlock example + + #+begin_src fortran :tangle Codes/mpi_deadlock.f90 +program deadlock + use mpi + implicit none + integer :: rank, size, value, source, destination, ierr + integer, parameter :: tag=100 + + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + + source = mod(size+rank-1, size) + destination = mod(rank+1 , size) + + call MPI_SSEND(rank+10, 1, MPI_INTEGER, destination, tag, MPI_COMM_WORLD, ierr) + call MPI_RECV(value, 1, MPI_INTEGER, source, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + print *, rank, 'received', value, 'from', source + call MPI_FINALIZE(ierr) +end program + #+end_src + +** Send-receive + + The ~MPI_Sendrecv~ function sends and receives a message /simultaneously/. + It can avoid deadlocks. + + #+begin_example +MPI_SENDRECV(SENDBUF, SENDCOUNT, SENDTYPE, DEST, SENDTAG, + RECVBUF, RECVCOUNT, RECVTYPE, SOURCE, RECVTAG, + COMM, STATUS, IERROR) + + SENDBUF(*), RECVBUF(*) +INTEGER :: SENDCOUNT, SENDTYPE, DEST, SENDTAG +INTEGER :: RECVCOUNT, RECVTYPE, SOURCE, RECVTAG, COMM +INTEGER :: STATUS(MPI_STATUS_SIZE), IERROR + #+end_example + +** Send-receive + + #+begin_src fortran :tangle Codes/mpi_sendrecv.f90 +program sendrecv + use mpi + implicit none + integer :: rank, size, value, source, destination, ierr + integer, parameter :: tag=100 + + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + + source = mod(size+rank-1, size) + destination = mod(rank+1 , size) + + call MPI_SENDRECV(rank+10, 1, MPI_INTEGER, destination, tag, value, & + 1, MPI_INTEGER, source, tag, MPI_COMM_WORLD, & + MPI_STATUS_IGNORE, ierr) + + print *, rank, 'received', value, 'from', source + + call MPI_FINALIZE(ierr) +end program + #+end_src + ** Collective communications *** One-to-all - - Broadcast: send same data to all - - Scatter: distribute an array + - ~MPI_Bcast~ :: Broadcast the same data to all + - ~MPI_Scatter~ :: distribute an array *** All-to-one - - Reduction: Sum/product/... of data coming from all ranks - - Gather: collect a distributed array + - ~MPI_Reduce~ :: Sum/product/... of data coming from all ranks + - ~Gather~ :: collect a distributed array *** All-to-all - - Reduction and broadcast + - ~MPI_Barrier~ :: Global synchronization + - ~MPI_AllReduce~ :: Reduce and broadcast the result + - ~MPI_AllGather~ :: Gather and broadcast the result + - ~MPI_Alltoall~ :: Gather and scatter the result + +** Computation of \pi with MPI + + \[ + \pi = 4 \int_{0}^{1} \sqrt{1-x^2} dx + \sim 4 \sum_{i=1}^M \sqrt{1-x_i^2}\, \Delta x + \] + \[ + \; \text{with} \; + 0 \le x_i \le 1 \;\text{and}\; \Delta x = x_{i+1} - x_i + \] + + - Define a grid of $M$ points + - Split the grid on $N$ processes + - Each process computes part of the sum + - The partial sums are reduced on the master process + +** Computation of \pi with MPI (Python) + + #+begin_src python :tangle Codes/mpi_pi.py +#!/usr/bin/env python +from mpi4py import MPI +import sys +from math import sqrt + +def main(): + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + M = int(sys.argv[1]) # Total Number of grid points + M_local = (M-1) // size + 1 # Number of grid points to compute locally + istart = rank * M_local # Beginning of interval + iend = min(istart + M_local, M) # End of interval + dx = 1./float(M) # Delta x + + sum = 0. + for i in range(istart, iend): + x = (i+0.5)*dx + sum += sqrt(1.-x*x) + #+end_src + +** Computation of \pi with MPI (Python) + + #+begin_src python :tangle Codes/mpi_pi.py + result = 4. * dx * sum + pi = comm.reduce(result, MPI.SUM) + print("%10f -> %10f : %10f"%(istart*dx, iend*dx, result)) + if rank == 0: + print("Result = ", pi) + +if __name__ == "__main__": main() + #+end_src + + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.6\textwidth} + #+begin_src text +$ mpiexec -n 4 python mpi_pi.py 100000000 + 0.000000 -> 0.250000 : 0.989483 +Result = 3.1415926535902408 + 0.250000 -> 0.500000 : 0.923740 + 0.500000 -> 0.750000 : 0.775058 + 0.750000 -> 1.000000 : 0.453312 + #+end_src + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.3\textwidth} + #+ATTR_LATEX: :width \textwidth + [[./pi_v1.png]] + #+LATEX: \end{column} + #+LATEX: \end{columns} + +** Alternate Computation of \pi with MPI (Python) + + #+begin_src python :tangle Codes/mpi_pi_v2.py +#!/usr/bin/env python +from mpi4py import MPI +import sys +from math import sqrt + +def main(): + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + M = int(sys.argv[1]) # Total Number of grid points + dx = 1./float(M) # Delta x + + sum = 0. + for i in range(rank, M, size): + x = (i+0.5)*dx + sum += sqrt(1.-x*x) + #+end_src + +** Computation of \pi with MPI (Python) + + #+begin_src python :tangle Codes/mpi_pi_v2.py + result = 4. * dx * sum + pi = comm.reduce(result, MPI.SUM) + print(rank, result) + if rank == 0: + print("Result = ", pi) + +if __name__ == "__main__": main() + #+end_src + + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.6\textwidth} + #+begin_src text +$ mpiexec -n 8 python mpi_pi_v2.py 100000000 +0 0.7853981783959749 +Result = 3.1415926535901777 +2 0.7853981583983196 +3 0.7853981483981102 +1 0.7853981683977732 + #+end_src + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.3\textwidth} + #+ATTR_LATEX: :width \textwidth + [[./pi_v2.png]] + #+LATEX: \end{column} + #+LATEX: \end{columns} -** Deadlocks * OpenMP * Exercises @@ -1527,6 +1751,19 @@ digraph G { #+RESULTS: + #+BEGIN_SRC dot :output file :file deadlock.png +digraph G { + graph [layout=dot] + A -> B ; + B -> C ; + C -> D ; + D -> A ; +} + #+END_SRC + + #+RESULTS: + [[file:deadlock.png]] + * Exam questions :noexport: @@ -1600,7 +1837,7 @@ digraph G { (setq org-latex-listings 'minted) (setq org-latex-custom-lang-environments nil) (setq org-latex-minted-options - '(("frame" "lines") + '( ("fontsize" "\\scriptsize") ("linenos" ""))) (setq org-latex-to-pdf-process diff --git a/pi_v1.png b/pi_v1.png new file mode 100644 index 0000000..e73b544 Binary files /dev/null and b/pi_v1.png differ diff --git a/pi_v2.png b/pi_v2.png new file mode 100644 index 0000000..932f56a Binary files /dev/null and b/pi_v2.png differ