From 0edf87790f7169dd097438fda1354be0f20b29ac Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 21 Nov 2021 23:49:55 +0100 Subject: [PATCH] Exercise 1 --- parallelism_scemama.org | 300 ++++++++++++++++++++++++++++++++++------ 1 file changed, 254 insertions(+), 46 deletions(-) diff --git a/parallelism_scemama.org b/parallelism_scemama.org index c523ad5..0c1f053 100644 --- a/parallelism_scemama.org +++ b/parallelism_scemama.org @@ -1052,18 +1052,18 @@ $ python Codes/socket_client.py lpqdh82 11279 ** Server for \pi with sockets in Python + #+NAME:py_server #+begin_src python :tangle Codes/pi_server_python.py #!/usr/bin/env python import sys, os, socket from math import sqrt -HOSTNAME = "localhost" PORT = 1666 error_threshold = 1.e-4 # Stopping criterion def main(): data = [] s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket - s.bind( (HOSTNAME, PORT) ) # Bind the socket to the address and port + s.bind( (socket.gethostname(), PORT) ) # Bind the socket to the address and port while True: s.listen(5) # Wait for incoming connections conn, addr = s.accept() # Accept connection @@ -1077,7 +1077,8 @@ def main(): #+end_src ** Server for \pi with sockets in Python - + + #+NAME:py_server2 #+begin_src python :tangle Codes/pi_server_python.py average = sum(data)/N # Compute average if N > 2: # Compute variance @@ -1135,7 +1136,7 @@ def main(): X = compute_pi() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket try: # Connect the socket to the address and port of the server - s.connect( (HOSTNAME, PORT) ) + s.connect( (HOSTAME, PORT) ) except socket.error: break message = str(X) @@ -1680,7 +1681,7 @@ if __name__ == '__main__': - OpenMP is an extension of programming languages that enable the use of multi-threading to parallelize the code using directives. - The OpenMP library may be implemented using =pthreads= - - Extensions in OpenMP5 to offload code execution to GPUs + - Extensions in OpenMP 5.0 to offload code execution to GPUs - The same source code can be executed with/without OpenMP #+begin_src fortran @@ -1693,64 +1694,164 @@ if __name__ == '__main__': !$OMP END PARALLEL #+end_src +** OpenMP directives (Fortran) + +- ~!$OMP PARALLEL~ starts a new multi-threaded section. + Everything inside this block is executed by /all/ the threads +- ~!$OMP DO~ tells the compiler to split the loop among the different + threads (by changing the loop boundaries for instance) +- ~!$OMP END DO~ marks the end of the parallel loop. + It contains an implicit synchronization. + After this line, all the threads have finished executing the loop. +- ~!$OMP END PARALLEL~ marks the end of the parallel section. Contains also an implicit barrier. +- ~DEFAULT(SHARED)~ : all the variables (A,B,C) are in shared memory by default +- ~PRIVATE(i)~ : the variable i is private to every thread + +** OpenMP directives (Fortran) + +- ~!$OMP CRITICAL ... !$OMP END CRITICAL~ : all the statements in this block are protected by a lock +- ~!$OMP TASK ... !$OMP END TASK~ : define a new task to execute +- ~!$OMP BARRIER~ : synchronization barrier +- ~!$OMP SINGLE ... !$OMP END SINGLE~ : all the statements in this block are executed by a single thread +- ~!$OMP MASTER ... !$OMP END MASTER~ : all the statements in this block are executed by the master thread + +** OpenMP + +*** Functions + + - ~omp_get_thread_num()~ : returns the ID of the current running + thread (like ~MPI_Rank~) + - ~omp_get_num_threads()~ : returns the total number of running + threads (like ~MPI_Size~) + - ~OMP_NUM_THREADS~ : Environment variable (shell) that fixes the + number of threads to run + +*** Important + + - Multiple threads *can read* at the same memory address + - Multiple threads **must not write** at the same address + +** Matrix multiplication + + \[ C_{ij} = \sum_{k=1}^N A_{ik} B_{kj} \] + + #+begin_src fortran + do j=1,N + do i=1,N + C(i,j) = 0.d0 + do k=1,N + C(i,j) = C(i,j) + A(i,k) * B(k,j) + end do + end do + end do + #+end_src + +** Matrix multiplication with OpenMP + + \[ C_{ij} = \sum_{k=1}^N A_{ik} B_{kj} \] + + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.5\textwidth} + #+begin_src fortran + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE (i,j,k) + do j=1,N + do i=1,N + C(i,j) = 0.d0 + do k=1,N + C(i,j) = C(i,j) + A(i,k) * B(k,j) + end do + end do + end do + !$OMP END PARALLEL DO + #+end_src + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.4\textwidth} + - Loop is parallelized over ~j~ + - Writing in ~C(i,j)~ is OK + #+LATEX: \end{column} + #+LATEX: \end{columns} + +** Matrix multiplication with OpenMP + + \[ C_{ij} = \sum_{k=1}^N A_{ik} B_{kj} \] + + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.5\textwidth} + #+begin_src fortran + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE (i,j,k) + !$OMP DO COLLAPSE(2) + do j=1,N + do i=1,N + C(i,j) = 0.d0 + do k=1,N + C(i,j) = C(i,j) + A(i,k) * B(k,j) + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + #+end_src + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.4\textwidth} + - Loop is parallelized over pairs ~(i,j)~ + - Writing in ~C(i,j)~ is OK + #+LATEX: \end{column} + #+LATEX: \end{columns} + + * Exercises - -** Monte Carlo +** Exercise 1: Monte Carlo 1. Write a Fortran src_fortran{double precision function compute_pi(M)} that computes $\pi$ with the Monte Carlo algorithm using $M$ samples - 2. Call it like this: - #+begin_src fortran + 2. Call it using this main program: + #+begin_src fortran :tangle Exercises/Ex1/main.f90 program pi_mc implicit none - integer :: M - logical :: iterate - double precision :: sample + integer :: M + logical :: iterate + double precision :: sample double precision, external :: compute_pi - call random_seed() ! Initialize random number generator - read (*,*) M ! Read number of samples in compute_pi - + open(unit=11, file='fortran_out.fifo', status='old', action='write', & + access='stream',form='formatted') iterate = .True. do while (iterate) ! Compute pi over N samples until 'iterate=.False.' - sample = compute_pi(M) - write(*,*) sample - read (*,*) iterate + sample = compute_pi() + write(11,*) sample end do end program pi_mc #+end_src -** Monte Carlo +** Exercise 1: Monte Carlo - 3. Write a Fortran src_fortran{double precision function compute_pi(M)} that computes - $\pi$ with the Monte Carlo algorithm using $M$ samples - #+begin_src fortran -program pi_mc - implicit none - integer :: M - logical :: iterate - double precision :: sample - double precision, external :: compute_pi - - call random_seed() ! Initialize random number generator - read (*,*) M ! Read number of samples in compute_pi - - iterate = .True. - do while (iterate) ! Compute pi over N samples until 'iterate=.False.' - sample = compute_pi(M) - write(*,*) sample - read (*,*) iterate - end do -end program pi_mc - #+end_src + 3. Write a Python server =pi_server.py= that receives samples of \pi in a socket + and compute the running average of \pi. Its address and port + number are written in a file =server.cfg=. + 4. Write a Python script =pi_bridge.py= that reads samples of \pi in a named pipe + =fortran_out.fifo= and sends the samples to the server. + This script can read the the address and port number of the + server from the file =server.cfg=. + 5. When the convergence criterion is reached, the server informs + the bridges that they can stop. -** Monte Carlo (solution) +*** Running a simulation on multiple nodes + - Run a single server + - Run one bridge per compute node using the =mpiexec= command + - Run one Fortran process per core using the =mpiexec= command - #+begin_src fortran -double precision function compute_pi(M) +** Exercise 2: Divide and conquer for matrix multiplication + +* Solutions :noexport: + +** Exercise 1: Monte Carlo + +*** Compute_pi.f90 + #+begin_src fortran :tangle Exercises/Ex1/compute_pi.f90 +double precision function compute_pi() implicit none - integer, intent(in) :: M + integer, parameter :: M=100000000 double precision :: x, y, n_in integer :: i @@ -1762,11 +1863,118 @@ double precision function compute_pi(M) n_in = n_in+1.d0 end if end do - compute_pi = 4.d0*n_in/dble(nmax) + compute_pi = 4.d0*n_in/dble(M) end function compute_pi + #+end_src +*** Compilation + #+begin_src bash +gfortran compute_pi.f90 main.f90 -o pi.x + #+end_src +*** Python bridge + + #+begin_src python :tangle Exercises/Ex1/pi_bridge.py +#!/usr/bin/env python +import os, sys, socket + +server_config = "server.cfg" # Configuration file +pipe_in = "fortran_out.fifo" # Named pipe for input + +def main(): + + # Read hostname and port number from config file + with open(server_config, 'r') as f: + HOSTNAME = f.readline().strip() + PORT = int( f.readline() ) + + # Open named pipe for reading the output of the Fortran executables + p_in = open(pipe_in, 'r') + + print("Bridge connects to", HOSTNAME, PORT) + while True: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket + try: # Connect the socket to the address and port of the server + s.connect( (HOSTNAME, PORT) ) + except socket.error: + print("Server inactive") + break + X = p_in.readline() + message = str(X) + s.send(message.encode()) # Send the data + reply = s.recv(128).decode('utf-8') # Read the reply of the server + + if reply == "STOP": break + +if __name__ == '__main__': + main() + + #+end_src + +*** Python server + + #+begin_src python :tangle Exercises/Ex1/pi_server.py +#!/usr/bin/env python +import sys, os, socket +from math import sqrt + +error_threshold = 1.e-5 # Stopping criterion +PORT = 1666 # Port number +server_config = "server.cfg" # Config file for the server + +def main(): + HOSTNAME = "localhost" #socket.gethostname() + with open(server_config, 'w') as f: # Write hostname to config file + f.write(HOSTNAME+"\n") + f.write(str(PORT)+"\n") + print("Server is", HOSTNAME, PORT) + + data = [] + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket + s.bind( (HOSTNAME, PORT) ) # Bind the socket to the address and port + + while True: + s.listen(5) # Wait for incoming connections + conn, addr = s.accept() # Accept connection + X = "" + while True: # Buffered read of the socket + message = conn.recv(128) + X += message.decode('utf-8') + if len(message) < 128: break + data.append( float(X) ) + N = len(data) + average = sum(data)/N # Compute average + if N > 2: # Compute variance + l = [ (x-average)*(x-average) for x in data ] + variance = sum(l)/(N-1.) + else: + variance = 0. + error = sqrt(variance)/sqrt(N) # Compute error + + print('%f +/- %f'%(average,error)) + + # Stopping condition + if N > 2 and error < error_threshold: + conn.send("STOP".encode()) + break + else: + conn.send("OK".encode()) + conn.close() + +if __name__ == "__main__": + main() + #+end_src +*** Script + #+begin_src bash :tangle Exercises/Ex1/run.sh +#!/bin/bash + +mkfifo fortran_out.fifo +python pi_server.py & +mpiexec --pernode python pi_bridge.py & +mpiexec pi.x +rm fortran_out.fifo + #+end_src +** Divide and conquer for matrix multiplication - #+end_src * Figures :noexport: #+BEGIN_SRC dot :output file :file merge.png