diff --git a/parallelism_scemama.org b/parallelism_scemama.org index d02c16f..895666c 100644 --- a/parallelism_scemama.org +++ b/parallelism_scemama.org @@ -25,10 +25,9 @@ * Program NOEXPORT :noexport: - 14:00-14:30 : Supercomputers 14:30-15:30 : Parallelism -15:45-17:30 : OpenMP/MPI +15:45-17:30 : MPI / OpenMP 09:00-10:30 : Presentation IRP + QP Pour IRPF90, je peux faire une presentation assez generale. @@ -656,7 +655,7 @@ sys 0m3.172s - Processes interact only through system-provided communication mechanisms - Fork: creates a copy of the current process - Exec: switches to running another binary executable - - Spawn: =Fork=, then =exec= the child + - Spawn: =Fork=, =exec= the child and =wait= for its termination *** Thread - Exist as subsets of a process - Context switching between threads is fast @@ -783,9 +782,8 @@ def main(): # Read data from the child print("Reading from the child") s = r.read() - r.close() print("Read '%s' from the child"%(s)) - + r.close() ; os.wait() #+end_src #+LATEX: \end{column} #+LATEX: \begin{column}{0.4\textwidth} @@ -813,7 +811,6 @@ def main(): if __name__ == "__main__": main() - #+end_src #+LATEX: \end{column} #+LATEX: \begin{column}{0.4\textwidth} @@ -1172,8 +1169,7 @@ $ python pi_server_python.py & #+end_src - -* Message Passing Interface (MPI) +* Message Passing Interface (MPI) :noexport: ** Message Passing Interface @@ -1515,7 +1511,7 @@ if __name__ == "__main__": main() #+LATEX: \begin{columns} #+LATEX: \begin{column}{0.6\textwidth} #+begin_src text -$ mpiexec -n 8 python mpi_pi_v2.py 100000000 +$ mpiexec -n 4 python mpi_pi_v2.py 100000000 0 0.7853981783959749 Result = 3.1415926535901777 2 0.7853981583983196 @@ -1529,6 +1525,155 @@ Result = 3.1415926535901777 #+LATEX: \end{column} #+LATEX: \end{columns} +* Multi-threading +** Processes /vs/ threads + +*** Process + - Has its own memory address space + - Context switching between processes is slow + - Processes interact only through system-provided communication mechanisms + - Fork: creates a copy of the current process + - Exec: switches to running another binary executable + - Spawn: =Fork=, then =exec= the child +*** Thread + - Exist as subsets of a process + - Context switching between threads is fast + - Share the same memory address space : interact via shared memory + +** Threads + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.5\textwidth} + #+ATTR_LATEX: :height 0.5\textheight + [[./smp.png]] + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.5\textwidth} + - Concurrent programming + - Graphical user interfaces (progress bars, ...) + - Asynchronous I/O + - Standard library: POSIX threads (pthreads) + #+LATEX: \end{column} + #+LATEX: \end{columns} + +*** Communication time + - Low latency network latency : \sim 1.2 microsecond + - Random memory access : \sim 0.1 microsecond + +** Threads example (Python) + + #+begin_src python :tangle Codes/thread_python.py +#!/usr/bin/env python +import threading +import time + +class test: + def __init__(self, Nthreads): + self.Nthreads = Nthreads + self.data = [ i for i in range(Nthreads) ] + + def run_thread(self, j): + self.data[j] = 0 + time.sleep(j) + self.data[j] = j + #+end_src + +** Threads example (Python) + + #+begin_src python :tangle Codes/thread_python.py + def run(self): + thread = [ None ] * self.Nthreads + t0 = time.time() + print(self.data) + for i in range(self.Nthreads): + thread[i] = threading.Thread( target=self.run_thread, args=(i,) ) + thread[i].start() + for i in range(self.Nthreads): + thread[i].join() + print(time.time()-t0, "seconds. ", self.data) + +if __name__ == '__main__': + t = test(4) + t.run() + + #+end_src +------------------- + #+begin_src text +$ python thread_python.py +[0, 1, 2, 3] +0.0009775161743164062 seconds. [0, 0, 0, 0] +1.0018701553344727 seconds. [0, 1, 0, 0] +2.003377676010132 seconds. [0, 1, 2, 0] +3.004056930541992 seconds. [0, 1, 2, 3] + #+end_src + +** Computation of \pi with threads in Python + + #+begin_src python :tangle Codes/pi_thread_python.py +#!/usr/bin/env python +import os, sys, threading +from random import random, seed +from math import sqrt + +NMAX = 10000000 # Nb of MC steps/process +error_threshold = 1.0e-4 # Stopping criterion + +class pi_calculator: + def __init__(self, Nthreads): + self.Nthreads= Nthreads + self.results = [] + self.lock = threading.Lock() + + def compute_pi(self): + result = 0. + for i in range(NMAX): # Loop NMAX times + x,y = random(), random() # Draw 2 random numbers x and y + if x*x + y*y <= 1.: # Check if (x,y) is in the circle + result += 1 + with self.lock: + self.results.append(4.* float(result)/float(NMAX)) + #+end_src + +** Computation of \pi with threads in Python + + #+begin_src python :tangle Codes/pi_thread_python.py + def run(self): + thread = [None] * self.Nthreads + for i in range(self.Nthreads): + thread[i] = threading.Thread( target=self.compute_pi, args=() ) + thread[i].start() + print("All threads started") + + while True: + for i in range(self.Nthreads): + thread[i].join() + N = len(self.results) + average = sum(self.results)/N # Compute average + if N > 2: # Compute variance + l = [ (x-average)*(x-average) for x in self.results ] + variance = sum(l)/(N-1.) + else: + variance = 0. + error = sqrt(variance)/sqrt(N) # Compute error + print("%f +/- %f %d"%(average, error, N)) + #+end_src + +** Computation of \pi with threads in Python + + #+begin_src python :tangle Codes/pi_thread_python.py + if N > 2 and error < error_threshold: # Stopping condition + return + + for i in range(self.Nthreads): + thread[i] = threading.Thread( target=self.compute_pi, args=() ) + thread[i].start() + +if __name__ == '__main__': + calc = pi_calculator(4) + calc.run() + #+end_src + + Note: Inefficient in Python because of the Global Interpreter Lock + (GIL), but you got the idea. + * OpenMP * Exercises diff --git a/smp.png b/smp.png new file mode 100644 index 0000000..a88f096 Binary files /dev/null and b/smp.png differ