MPI Fini

2021-11-21 16:12:12 +01:00 · 2021-11-21 16:12:12 +01:00 · 65b8e4294d
commit 65b8e4294d
parent 7037335bff
4 changed files with 246 additions and 9 deletions
--- a/deadlock.png
+++ b/deadlock.png
--- a/parallelism_scemama.org
+++ b/parallelism_scemama.org
@ -1212,6 +1212,25 @@ $ python pi_server_python.py &
    - Receive: 
      ~MPI_RECV(buffer, count, datatype, source, tag, communicator,
      status, ierror)~
+
+** Synchronous or asynchronous?
+
+   - ~MPI_Ssend~ :: Blocking synchronous send. Returns upon notification
+     that the message has been received (safe).
+   - ~MPI_Isend~ :: Non-blocking send. Returns immediately (dangerous).
+   - ~MPI_Send~ :: Returns when the send buffer is ready for
+     use. Non-deterministic: depends on MPI implementation, size of
+     the data, number of ranks, ... (dangerous)
+
+     #+LATEX: \begin{alertblock}{Important}
+   - Writing the program and Debugging:
+     - /Always/ use ~MPI_Ssend~
+     - Use ~MPI_Isend~ only when ~MPI_Ssend~ is irrelevant
+   - Once the code is well checked
+     - You can replace ~MPI_Ssend~ by ~MPI_Send~ as an optimization.
+     - If ~MPI_Send~ is still too slow, consider rewriting for ~MPI_Isend~
+     #+LATEX: \end{alertblock}
+     
 ** Point-to-point communication (Python)

   #+begin_src python  :tangle Codes/mpi_rank.py
@ -1225,7 +1244,7 @@ def main():
    if rank == 0:
        data = 42
        print("Before: Rank: %d    Size: %d    Data: %d"%(rank, size, data))
-        comm.send(data, dest=1, tag=11)
+        comm.ssend(data, dest=1, tag=11)
        print("After : Rank: %d    Size: %d    Data: %d"%(rank, size, data))
    elif rank == 1:
        data = 0
@ -1278,7 +1297,7 @@ program test_rank
    if (rank == 0) then
        data = 42
        print *, "Before: Rank:", rank, "Size:", size, "Data: ", data
-        call MPI_SEND(data, 1, MPI_INTEGER, 1, 11, MPI_COMM_WORLD, ierr)
+        call MPI_SSEND(data, 1, MPI_INTEGER, 1, 11, MPI_COMM_WORLD, ierr)
        print *, "After : Rank:", rank, "Size:", size, "Data: ", data

    else if (rank == 1) then
@ -1293,18 +1312,223 @@ program test_rank
 end program
   #+end_src

+** Deadlocks
+   #+LATEX: \begin{columns}
+   #+LATEX: \begin{column}{0.3\textwidth}
+   #+ATTR_LATEX: :height 0.8 \textheight
+   [[./deadlock.png]]
+   #+LATEX: \end{column}
+   #+LATEX: \begin{column}{0.7\textwidth}
+   
+   #+LATEX: \begin{exampleblock}{Deadlock}
+    Each process waits for a message coming from another process
+   #+LATEX: \end{exampleblock}
+
+    Example: round-robin
+    #+LATEX: \end{column}
+    #+LATEX: \end{columns}
+
+** Deadlock example
+
+   #+begin_src fortran  :tangle Codes/mpi_deadlock.f90
+program deadlock
+  use mpi
+  implicit none
+  integer ::  rank, size, value, source, destination, ierr
+  integer, parameter :: tag=100
+
+  call MPI_INIT(ierr)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
+
+  source      = mod(size+rank-1, size)
+  destination = mod(rank+1     , size)
+  
+  call MPI_SSEND(rank+10, 1, MPI_INTEGER, destination, tag, MPI_COMM_WORLD, ierr)
+  call MPI_RECV(value, 1, MPI_INTEGER, source, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+  
+  print *, rank, 'received', value, 'from', source
+  call MPI_FINALIZE(ierr)
+end program
+   #+end_src
+   
+** Send-receive
+
+   The ~MPI_Sendrecv~ function sends and receives a message /simultaneously/.
+   It can avoid deadlocks.
+
+   #+begin_example
+MPI_SENDRECV(SENDBUF, SENDCOUNT, SENDTYPE, DEST, SENDTAG,
+             RECVBUF, RECVCOUNT, RECVTYPE, SOURCE, RECVTAG,
+             COMM, STATUS, IERROR)
+
+<type>    SENDBUF(*), RECVBUF(*)
+INTEGER :: SENDCOUNT, SENDTYPE, DEST, SENDTAG
+INTEGER :: RECVCOUNT, RECVTYPE, SOURCE, RECVTAG, COMM
+INTEGER :: STATUS(MPI_STATUS_SIZE), IERROR
+   #+end_example
+
+** Send-receive
+
+   #+begin_src fortran  :tangle Codes/mpi_sendrecv.f90
+program sendrecv
+  use mpi
+  implicit none
+  integer ::  rank, size, value, source, destination, ierr
+  integer, parameter :: tag=100
+
+  call MPI_INIT(ierr)
+  call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
+  call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr)
+
+  source      = mod(size+rank-1, size)
+  destination = mod(rank+1     , size)
+  
+  call MPI_SENDRECV(rank+10, 1, MPI_INTEGER, destination, tag, value, &
+                    1, MPI_INTEGER, source, tag, MPI_COMM_WORLD,      &
+                    MPI_STATUS_IGNORE, ierr)
+  
+  print *, rank, 'received', value, 'from', source
+  
+  call MPI_FINALIZE(ierr)
+end program
+   #+end_src
+
 ** Collective communications

 *** One-to-all
-    - Broadcast: send same data to all
-    - Scatter: distribute an array
+    - ~MPI_Bcast~ :: Broadcast the same data to all
+    - ~MPI_Scatter~ :: distribute an array
 *** All-to-one
-    - Reduction: Sum/product/... of data coming from all ranks
-    - Gather: collect a distributed array
+    - ~MPI_Reduce~ :: Sum/product/... of data coming from all ranks
+    - ~Gather~ :: collect a distributed array
 *** All-to-all
-    - Reduction and broadcast
+    - ~MPI_Barrier~ :: Global synchronization
+    - ~MPI_AllReduce~ :: Reduce and broadcast the result
+    - ~MPI_AllGather~ :: Gather and broadcast the result
+    - ~MPI_Alltoall~ :: Gather and scatter the result
+
+** Computation of \pi with MPI
+
+   \[
+   \pi = 4 \int_{0}^{1} \sqrt{1-x^2} dx 
+       \sim 4 \sum_{i=1}^M \sqrt{1-x_i^2}\, \Delta x
+   \]    
+   \[
+   \; \text{with} \;
+       0 \le x_i \le 1 \;\text{and}\; \Delta x = x_{i+1} - x_i
+   \]
+
+   - Define a grid of $M$ points
+   - Split the grid on $N$ processes
+   - Each process computes part of the sum
+   - The partial sums are reduced on the master process
+
+** Computation of \pi with MPI (Python)
+
+   #+begin_src python  :tangle Codes/mpi_pi.py
+#!/usr/bin/env python
+from mpi4py import MPI
+import sys
+from math import sqrt
+
+def main():
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    M = int(sys.argv[1])              # Total Number of grid points
+    M_local = (M-1) // size + 1       # Number of grid points to compute locally
+    istart = rank * M_local           # Beginning of interval
+    iend = min(istart + M_local, M)   # End of interval
+    dx = 1./float(M)                  # Delta x
+
+    sum = 0.
+    for i in range(istart, iend):
+        x = (i+0.5)*dx
+        sum += sqrt(1.-x*x)
+   #+end_src
+
+** Computation of \pi with MPI (Python)
+
+   #+begin_src python  :tangle Codes/mpi_pi.py
+    result = 4. * dx * sum
+    pi = comm.reduce(result, MPI.SUM)
+    print("%10f -> %10f : %10f"%(istart*dx, iend*dx, result))
+    if rank == 0:
+        print("Result = ", pi)
+
+if __name__ == "__main__": main()        
+   #+end_src
+
+   #+LATEX: \begin{columns}
+   #+LATEX: \begin{column}{0.6\textwidth}
+   #+begin_src text
+$ mpiexec -n 4 python mpi_pi.py 100000000
+  0.000000 ->   0.250000 :   0.989483
+Result =  3.1415926535902408
+  0.250000 ->   0.500000 :   0.923740
+  0.500000 ->   0.750000 :   0.775058
+  0.750000 ->   1.000000 :   0.453312
+   #+end_src
+   #+LATEX: \end{column}
+   #+LATEX: \begin{column}{0.3\textwidth}
+   #+ATTR_LATEX: :width \textwidth
+   [[./pi_v1.png]]
+   #+LATEX: \end{column}
+   #+LATEX: \end{columns}
+
+** Alternate Computation of \pi with MPI (Python)
+
+   #+begin_src python  :tangle Codes/mpi_pi_v2.py
+#!/usr/bin/env python
+from mpi4py import MPI
+import sys
+from math import sqrt
+
+def main():
+    comm = MPI.COMM_WORLD
+    rank = comm.Get_rank()
+    size = comm.Get_size()
+
+    M = int(sys.argv[1])              # Total Number of grid points
+    dx = 1./float(M)                  # Delta x
+
+    sum = 0.
+    for i in range(rank, M, size):
+        x = (i+0.5)*dx
+        sum += sqrt(1.-x*x)
+   #+end_src
+
+** Computation of \pi with MPI (Python)
+
+   #+begin_src python  :tangle Codes/mpi_pi_v2.py
+    result = 4. * dx * sum
+    pi = comm.reduce(result, MPI.SUM)
+    print(rank, result)
+    if rank == 0:
+        print("Result = ", pi)
+
+if __name__ == "__main__": main()        
+   #+end_src
+
+   #+LATEX: \begin{columns}
+   #+LATEX: \begin{column}{0.6\textwidth}
+   #+begin_src text
+$ mpiexec -n 8 python mpi_pi_v2.py 100000000
+0 0.7853981783959749
+Result =  3.1415926535901777
+2 0.7853981583983196
+3 0.7853981483981102
+1 0.7853981683977732
+   #+end_src
+   #+LATEX: \end{column}
+   #+LATEX: \begin{column}{0.3\textwidth}
+   #+ATTR_LATEX: :width \textwidth
+   [[./pi_v2.png]]
+   #+LATEX: \end{column}
+   #+LATEX: \end{columns}

-** Deadlocks
 * OpenMP

 * Exercises
@ -1527,6 +1751,19 @@ digraph G {

  #+RESULTS:

+  #+BEGIN_SRC dot :output file :file deadlock.png
+digraph G {
+  graph [layout=dot]
+  A -> B ;
+  B -> C ;
+  C -> D ;
+  D -> A ;
+}  
+  #+END_SRC
+
+  #+RESULTS:
+  [[file:deadlock.png]]
+
 * Exam questions                                                   :noexport:

  
@ -1600,7 +1837,7 @@ digraph G {
 (setq org-latex-listings 'minted)
 (setq org-latex-custom-lang-environments nil)
 (setq org-latex-minted-options
-      '(("frame" "lines")
+      '(
 	("fontsize" "\\scriptsize")
 	("linenos" "")))
 (setq org-latex-to-pdf-process
--- a/pi_v1.png
+++ b/pi_v1.png
--- a/pi_v2.png
+++ b/pi_v2.png