** Server for \pi with sockets in Python
#+begin_src python :tangle Codes/pi_server_python.py
#!/usr/bin/env python
import sys, os, socket
from math import sqrt
HOSTNAME = "localhost"
PORT = 1666
error_threshold = 1.e-4 # Stopping criterion
def main():
data = []
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket
s.bind( (socket.gethostname(), PORT) ) # Bind the socket to the address and port
while True:
s.listen(5) # Wait for incoming connections
conn, addr = s.accept() # Accept connection
** Server for \pi with sockets in Python
#+begin_src python :tangle Codes/pi_server_python.py
average = sum(data)/N # Compute average
if N > 2: # Compute variance
X = compute_pi()
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket
try: # Connect the socket to the address and port of the server
s.connect( (HOSTAME, PORT) )
except socket.error:
break
message = str(X)
- OpenMP is an extension of programming languages that enable the use of
multi-threading to parallelize the code using directives.
- The OpenMP library may be implemented using =pthreads=
- Extensions in OpenMP 5.0 to offload code execution to GPUs
- The same source code can be executed with/without OpenMP
#+begin_src fortran
#+end_src
** OpenMP directives (Fortran)
- ~!$OMP PARALLEL~ starts a new multi-threaded section.
Everything inside this block is executed by /all/ the threads
- ~!$OMP DO~ tells the compiler to split the loop among the different
threads (by changing the loop boundaries for instance)
- ~!$OMP END DO~ marks the end of the parallel loop.
It contains an implicit synchronization.
After this line, all the threads have finished executing the loop.
- ~!$OMP END PARALLEL~ marks the end of the parallel section. Contains also an implicit barrier.
- ~DEFAULT(SHARED)~ : all the variables (A,B,C) are in shared memory by default
- ~PRIVATE(i)~ : the variable i is private to every thread
** OpenMP directives (Fortran)
- ~!$OMP CRITICAL ... !$OMP END CRITICAL~ : all the statements in this block are protected by a lock
- ~!$OMP TASK ... !$OMP END TASK~ : define a new task to execute
- ~!$OMP BARRIER~ : synchronization barrier
- ~!$OMP SINGLE ... !$OMP END SINGLE~ : all the statements in this block are executed by a single thread
- ~!$OMP MASTER ... !$OMP END MASTER~ : all the statements in this block are executed by the master thread
** OpenMP
*** Functions
- ~omp_get_thread_num()~ : returns the ID of the current running
thread (like ~MPI_Rank~)
- ~omp_get_num_threads()~ : returns the total number of running
threads (like ~MPI_Size~)
- ~OMP_NUM_THREADS~ : Environment variable (shell) that fixes the
number of threads to run
*** Important
- Multiple threads *can read* at the same memory address
- Multiple threads **must not write** at the same address
** Matrix multiplication
\[ C_{ij} = \sum_{k=1}^N A_{ik} B_{kj} \]
#+begin_src fortran
do j=1,N
do i=1,N
C(i,j) = 0.d0
do k=1,N
C(i,j) = C(i,j) + A(i,k) * B(k,j)
end do
end do
end do
** Matrix multiplication with OpenMP
\[ C_{ij} = \sum_{k=1}^N A_{ik} B_{kj} \]
#+begin_src fortran
do j=1,N
do i=1,N
C(i,j) = 0.d0
do k=1,N
C(i,j) = C(i,j) + A(i,k) * B(k,j)
end do
end do
end do
- Loop is parallelized over ~j~
- Writing in ~C(i,j)~ is OK
** Matrix multiplication with OpenMP
\[ C_{ij} = \sum_{k=1}^N A_{ik} B_{kj} \]
#+begin_src fortran
do j=1,N
do i=1,N
C(i,j) = 0.d0
do k=1,N
C(i,j) = C(i,j) + A(i,k) * B(k,j)
end do
end do
end do
- Loop is parallelized over pairs ~(i,j)~
- Writing in ~C(i,j)~ is OK
* Exercises * Exercises
** Exercise 1: Monte Carlo
** Monte Carlo
1. Write a Fortran src_fortran{double precision function compute_pi(M)} that computes 1. Write a Fortran src_fortran{double precision function compute_pi(M)} that computes
$\pi$ with the Monte Carlo algorithm using $M$ samples $\pi$ with the Monte Carlo algorithm using $M$ samples
2. Call it like this: 2. Call it using this main program:
#+begin_src fortran :tangle Exercises/Ex1/main.f90
program pi_mc program pi_mc
implicit none implicit none
integer :: M integer :: M
logical :: iterate logical :: iterate
double precision :: sample double precision :: sample
double precision, external :: compute_pi double precision, external :: compute_pi
call random_seed() ! Initialize random number generator call random_seed() ! Initialize random number generator
open(unit=11, file='fortran_out.fifo', status='old', action='write', &
       form='formatted')
iterate = .True. iterate = .True.
do while (iterate) ! Compute pi over N samples until 'iterate=.False.' do while (iterate) ! Compute pi over N samples until 'iterate=.False.'
sample = compute_pi()
write(11,*) sample
read (*,*) iterate
end do end do
end program pi_mc end program pi_mc
#+end_src #+end_src
** Exercise 1: Monte Carlo
3. Write a Python server =pi_server.py= that receives samples of \pi in a socket
and compute the running average of \pi. Its address and port
number are written in a file =server.cfg=.
4. Write a Python script =pi_bridge.py= that reads samples of \pi in a named pipe
=fortran_out.fifo= and sends the samples to the server.
This script can read the the address and port number of the
server from the file =server.cfg=.
5. When the convergence criterion is reached, the server informs
the bridges that they can stop.
*** Running a simulation on multiple nodes
- Run a single server
- Run one bridge per compute node using the =mpiexec= command
- Run one Fortran process per core using the =mpiexec= command
** Exercise 2: Divide and conquer for matrix multiplication
* Solutions :noexport:
** Exercise 1: Monte Carlo
*** Compute_pi.f90
#+begin_src fortran :tangle Exercises/Ex1/compute_pi.f90
double precision function compute_pi()
implicit none implicit none
integer :: M integer, parameter :: M=100000000
logical :: iterate
double precision :: sample
double precision, external :: compute_pi
call random_seed() ! Initialize random number generator
read (*,*) M ! Read number of samples in compute_pi
iterate = .True.
do while (iterate) ! Compute pi over N samples until 'iterate=.False.'
sample = compute_pi(M)
write(*,*) sample
read (*,*) iterate
end do
end program pi_mc
** Monte Carlo (solution)
#+begin_src fortran
double precision function compute_pi(M)
implicit none
integer, intent(in) :: M
double precision :: x, y, n_in double precision :: x, y, n_in
integer :: i integer :: i
n_in = n_in+1.d0 n_in = n_in+1.d0
end if end if
end do end do
compute_pi = 4.d0*n_in/dble(M)
end function compute_pi end function compute_pi
*** Compilation
#+begin_src bash
gfortran compute_pi.f90 main.f90 -o pi.x
*** Python bridge
#+begin_src python :tangle Exercises/Ex1/pi_bridge.py
#!/usr/bin/env python
import os, sys, socket
server_config = "server.cfg" # Configuration file
pipe_in = "fortran_out.fifo" # Named pipe for input
def main():
# Read hostname and port number from config file
with open(server_config, 'r') as f:
HOSTNAME = f.readline().strip()
PORT = int( f.readline() )
# Open named pipe for reading the output of the Fortran executables
p_in = open(pipe_in, 'r')
print("Bridge connects to", HOSTNAME, PORT)
while True:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket
try: # Connect the socket to the address and port of the server
s.connect( (HOSTNAME, PORT) )
except socket.error:
print("Server inactive")
X = p_in.readline()
message = str(X)
s.send(message.encode()) # Send the data
reply = s.recv(128).decode('utf-8') # Read the reply of the server
if reply == "STOP": break
if __name__ == '__main__':
#+end_src
*** Python server
#+begin_src python :tangle Exercises/Ex1/pi_server.py
#!/usr/bin/env python
import sys, os, socket
from math import sqrt
error_threshold = 1.e-5 # Stopping criterion
PORT = 1666 # Port number
server_config = "server.cfg" # Config file for the server
def main():
HOSTNAME = "localhost" #socket.gethostname()
with open(server_config, 'w') as f: # Write hostname to config file
print("Server is", HOSTNAME, PORT)
data = []
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Create an INET socket
s.bind( (HOSTNAME, PORT) ) # Bind the socket to the address and port
while True:
s.listen(5) # Wait for incoming connections
conn, addr = s.accept() # Accept connection
X = ""
while True: # Buffered read of the socket
message = conn.recv(128)
X += message.decode('utf-8')
if len(message) < 128: break
data.append( float(X) )
N = len(data)
average = sum(data)/N # Compute average
if N > 2: # Compute variance
l = [ (x-average)*(x-average) for x in data ]
variance = sum(l)/(N-1.)
variance = 0.
error = sqrt(variance)/sqrt(N) # Compute error
print('%f +/- %f'%(average,error))
# Stopping condition
if N > 2 and error < error_threshold:
if __name__ == "__main__":
*** Script
#+begin_src bash :tangle Exercises/Ex1/run.sh
mkfifo fortran_out.fifo
python pi_server.py &
mpiexec --pernode python pi_bridge.py &
mpiexec pi.x
rm fortran_out.fifo
** Divide and conquer for matrix multiplication
* Figures :noexport: * Figures :noexport:
#+BEGIN_SRC dot :output file :file merge.png #+BEGIN_SRC dot :output file :file merge.png