final version

This commit is contained in:
Anthony Scemama 2021-07-01 16:04:42 +02:00
parent 22a66f5d6b
commit c57746aabf
8 changed files with 551 additions and 88 deletions

BIN
TREX2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 562 KiB

BIN
interfaces.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

BIN
interfaces2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

BIN
maqao1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 322 KiB

BIN
maqao2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 370 KiB

View File

@ -1,8 +1,9 @@
#+TITLE: Library development within TREX
#+DATE: 12/03/2021
#+AUTHOR: Anthony Scemama
#+TITLE: TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations
#+DATE: 02/07/2021
#+AUTHOR: Anthony Scemama$^1$, Pablo de Oliveira Castro$^2$, Cedric Valensi$^2$, William Jalby$^2$
#+LaTeX_HEADER: \institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)}
#+LaTeX_HEADER: \institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\
#+LaTeX_HEADER: $^2$University of Versailles, Li-PaRAD (France)}
#+LATEX_CLASS: beamer
#+LaTeX_CLASS_OPTIONS:[aspectratio=169]
#+BEAMER_THEME: trex
@ -20,12 +21,21 @@
#+LaTeX_HEADER: \definecolor{darkpink}{rgb}{0.7,0.0,0.7}
#+LaTeX_HEADER: \newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
#+LaTeX_HEADER: \newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
#+LaTeX_HEADER: \usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex}
#+LaTeX_HEADER: \addbibresource{verificarlo.bib}
#+LaTeX_HEADER: \usepackage{graphicx}
#+LaTeX_HEADER: \usepackage[many]{tcolorbox}
#+LaTeX_HEADER: \usepackage{tikz}
#+LaTeX_HEADER: \usetikzlibrary{tikzmark,positioning}
#+LaTeX_HEADER: \definecolor{grey}{RGB}{170,170,170}
#+EXPORT_EXCLUDE_TAGS: noexport
#+startup: beamer
#+options: H:1 toc:nil
* Quantum chemistry
* Quantum chemistry :noexport:
#+LATEX: \begin{columns}
#+LATEX: \begin{column}{0.25\textwidth}
@ -68,6 +78,24 @@
#+LATEX: \end{column}
#+LATEX: \end{columns}
* The TREX CoE
#+LATEX: \begin{columns}
#+LATEX: \begin{column}{0.75\textwidth}
#+ATTR_LATEX: :width \textwidth
[[./TREX2.png]]
#+LATEX: \end{column}
#+LATEX: \begin{column}{0.25\textwidth}
#+LATEX: \begin{exampleblock}{Codes}
- CHAMP
- QMC=Chem
- TurboRVB
- NECI
- Quantum Package
- GammCor
#+LATEX: \end{exampleblock}
#+LATEX: \end{column}
#+LATEX: \end{columns}
* TREX: Targeting REal chemical accuracy at the EXascale
#+LATEX: \begin{columns}
@ -79,8 +107,10 @@
#+LATEX: \begin{column}{0.6\textwidth}
#+LATEX: \begin{exampleblock}{Objective: Make codes ready for exascale}
How: Instead of re-writing codes, provide libraries
- One library for exchanging information between codes (*TREXIO*)
- One library for high-performance (*QMCkl*)
- A library for exchanging information between codes (*TREXIO*)
$\Longrightarrow$ Enables HTC
- A library for high-performance (*QMCkl*)
$\Longrightarrow$ Enables HPC
#+LATEX: \end{exampleblock}
#+LATEX: \begin{exampleblock}{QMC: Quantum Monte Carlo methods}
- Highly accurate
@ -90,6 +120,97 @@
#+LATEX: \end{column}
#+LATEX: \end{columns}
* I/O library (TREXIO)
#+LATEX: \begin{columns}
#+LATEX: \begin{column}{0.4\textwidth}
#+LATEX: \begin{exampleblock}{Before}
#+BEGIN_SRC dot :output file :file interfaces.png
digraph G {
QP [label="Quantum Package"];
QMCCHEM [label="QMC=Chem"];
Turbo [label="TurboRVB"];
QP -> NECI;
NECI -> GammCor [style="dotted"];
NECI -> QMCCHEM [style="dotted"] ;
QP -> QMCCHEM;
QP -> CHAMP;
QP -> GammCor [style="dotted"];
QP -> Turbo [style="dotted"];
NECI -> Turbo [style="dotted"];
NECI -> CHAMP [style="dotted"];
QMCCHEM -> GammCor [style="dotted"];
CHAMP -> GammCor [style="dotted"];
Turbo -> GammCor [style="dotted"];
}
#+END_SRC
#+RESULTS:
[[file:interfaces.png]]
#+LATEX: \end{exampleblock}
#+LATEX: \end{column}
#+LATEX: \begin{column}{0.6\textwidth}
#+LATEX: \begin{exampleblock}{After}
#+BEGIN_SRC dot :output file :file interfaces2.png
digraph G {
layout=circo;
External [label="External codes"];
QP [label="Quantum Package"];
QMCCHEM [label="QMC=Chem"];
Turbo [label="TurboRVB"];
TREX [label="TREXIO File", shape="box"];
CHAMP -> TREX;
GammCor -> TREX;
NECI -> TREX;
QMCCHEM -> TREX;
QP -> TREX;
Turbo -> TREX;
External -> TREX;
TREX -> CHAMP;
TREX -> GammCor;
TREX -> NECI;
TREX -> QMCCHEM;
TREX -> QP;
TREX -> Turbo;
TREX -> External;
}
#+END_SRC
#+RESULTS:
[[file:interfaces2.png]]
#+LATEX: \end{exampleblock}
#+LATEX: \end{column}
#+LATEX: \end{columns}
(BSD license) \\
https://github.com/trex-coe/trexio
* I/O library (TREXIO)
#+LATEX: \begin{columns}
#+LATEX: \begin{column}{0.50\textwidth}
#+LATEX: \begin{exampleblock}{Front end}
- Definition of an API for to read/write wave functions
- C-compatible API: Easy bindings in other languages
#+LATEX: \end{exampleblock}
#+LATEX: \begin{exampleblock}{Content of the files}
- File is self-contained: no external knowledge needed to compute
$\Psi(r_1,\dots,r_n)$ (normalization factors, basis et
parameters, /etc/)
- Strong conventions (atomic units, ordering of cartesian orbitals, /etc/)
#+LATEX: \end{exampleblock}
#+LATEX: \end{column}
#+LATEX: \begin{column}{0.5\textwidth}
#+ATTR_LATEX: :width 0.7\textwidth
[[./api.png]]
#+LATEX: \begin{exampleblock}{Back end}
- HDF5: Efficient I/O
- Text: debugging, fallback when HDF5 can't be installed
#+LATEX: \end{exampleblock}
Source code generated from a config file.
#+LATEX: \end{column}
#+LATEX: \end{columns}
* Quantum Monte Carlo (QMC)
#+BEGIN_SRC latex
@ -132,9 +253,6 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
[[./Qmc.png]]
#+LATEX: \end{column}
#+LATEX: \end{columns}
* QMC kernel library (QMCkl)
@ -146,9 +264,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
- architectures
- problem sizes
- requested accuracy (reduced precision)
- The sequence of kernels will be scheduled with the StarPU runtime
* QMC kernel library (QMCkl)
** Two implementations
@ -159,24 +275,15 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
** Advantages
- The code can stay easy to understand by the physicists/chemists
Performance-related aspects are delegated to the library
- Changing architecture requires only linking with another
version of the library
- Scientists can use their preferred language
- Scientists don't lose control on their codes
- Codes don't die when the architecture changes
- Scientific code development does not break the performance
- Better re-use of the optimization effort among the community
* Literate programming :noexport:
#+BEGIN_quote
Literate programming is a programming paradigm introduced by Donald
Knuth in which a computer program is given an explanation of its
logic in a natural language, such as English, interspersed with
snippets of macros and traditional source code, from which
compilable source code can be generated. (Wikipedia)
#+END_quote
* Documentation library :noexport:
Literate programming with org-mode:
- Here, comments are more important than code
- Comments are more important than code
- Can add graphics, \LaTeX formulas, tables, etc
- Documentation always synchronized with the code
- Some routines can be generated by embedded scripts
@ -185,7 +292,24 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
- The API is C-compatible: QMCkl appears like a C library
$\Longrightarrow$ can be used in all other languages
* Design strategy
* HPC library
- Same API as the documentation library
- Optimization is guided by analysis with *MAQAO*\footnote{https://maqao.org}.
- Propose performance-critical choices in the API design (data
structures, memory management, /etc/)
- Both CPU and GPU versions of the kernels
- Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and
handle asynchronous CPU-GPU transfers
* Efficiently guiding the developer
#+ATTR_LATEX: :width \textwidth
[[./maqao1.png]]
* Extensive/automatic testing of different configurations
#+ATTR_LATEX: :width \textwidth
[[./maqao2.png]]
* Design strategy :noexport:
1. Kernel extraction: QMC specialists agree on the
mathematical expression of the problem
@ -195,8 +319,8 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
4. The documentation library is linked in a QMC code to check correctness
5. HPC experts provide an HPC version of the kernel
6. The HPC library is linked in the QMC codes of the CoE
* Our first application : 3-body Jastrow factor
* First application : 3-body Jastrow factor
#+LATEX: \newcommand{\Jeen}{J_{\text{een}}}
#+LATEX: \newcommand{\Nel}{N_{\text{elec}}}
@ -231,12 +355,39 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
#+LATEX: \end{column}
#+LATEX: \end{columns}
#+INCLUDE: "verificarlo.tex" export latex
* Verificarlo CI
* Links
#+LATEX: \begin{columns}
#+LATEX: \begin{column}{0.5\textwidth}
#+LATEX: \begin{exampleblock}{Compare runs}
#+ATTR_LATEX: :width 0.85\textwidth
[[./img/cmp-runs.png]]
- Track precision of kernels over commits
- Shows significant digits $s$, standard deviation $\sigma$,
variable distribution
#+LATEX: \end{exampleblock}
#+LATEX: \end{column}
#+LATEX: \begin{column}{0.5\textwidth}
#+LATEX: \begin{exampleblock}{Inspect runs}
#+ATTR_LATEX: :width 0.85\textwidth
[[./img/inspect-runs.png]]
- Focus in depth on one particular run
- Compare multiple implementations of the same kernel
#+LATEX: \end{exampleblock}
#+LATEX: \end{column}
#+LATEX: \end{columns}
- TREX web site : https://trex-coe.eu
- QMCkl documentation : https://trex-coe.github.io/qmckl
- QMCkl repository : https://github.com/trex-coe/qmckl
* Useful links
| TREX web site | https://trex-coe.eu |
| TREXIO | https://github.com/trex-coe/trexio |
| QMCkl | https://github.com/trex-coe/qmckl |
| QMCkl documentation | https://trex-coe.github.io/qmckl |
| MAQAO | http://www.maqao.org |
| Verificarlo | https://github.com/verificarlo/verificarlo |
* Export :noexport:
#+BEGIN_SRC elisp :output none

View File

@ -1,4 +1,4 @@
% Created 2021-06-30 Wed 12:12
% Created 2021-07-01 Thu 15:48
% Intended LaTeX compiler: pdflatex
\documentclass[aspectratio=169]{beamer}
\usepackage[utf8]{inputenc}
@ -14,7 +14,8 @@
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)}
\institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\
$^2$University of Versailles, Li-PaRAD (France)}
\usepackage{minted}
\usemintedstyle{emacs}
\newminted{f90}{fontsize=\footnotesize}
@ -29,13 +30,20 @@
\definecolor{darkpink}{rgb}{0.7,0.0,0.7}
\newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
\newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
\usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex}
\addbibresource{verificarlo.bib}
\usepackage{graphicx}
\usepackage[many]{tcolorbox}
\usepackage{tikz}
\usetikzlibrary{tikzmark,positioning}
\definecolor{grey}{RGB}{170,170,170}
\usetheme{trex}
\author{Anthony Scemama}
\date{12/03/2021}
\title{Library development within TREX}
\author{Anthony Scemama\(^1\), Pablo de Oliveira Castro\(^2\), Cedric Valensi\(^2\), William Jalby\(^2\)}
\date{02/07/2021}
\title{TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations}
\hypersetup{
pdfauthor={Anthony Scemama},
pdftitle={Library development within TREX},
pdfauthor={Anthony Scemama\(^1\), Pablo de Oliveira Castro\(^2\), Cedric Valensi\(^2\), William Jalby\(^2\)},
pdftitle={TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations},
pdfkeywords={},
pdfsubject={},
pdfcreator={Emacs 26.3 (Org mode 9.4)},
@ -44,22 +52,7 @@
\maketitle
\begin{frame}[label={sec:org52bec56}]{Quantum chemistry}
\begin{columns}
\begin{column}{0.25\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./dirac_4.jpg}
\end{center}
\end{column}
\begin{column}{0.75\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./dirac2.png}
\end{center}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org2a0da55}]{Quantum chemistry}
\begin{frame}[label={sec:org0538bed}]{Quantum chemistry}
\begin{columns}
\begin{column}{0.6\textwidth}
\begin{exampleblock}{}
@ -97,7 +90,29 @@
\end{columns}
\end{frame}
\begin{frame}[label={sec:org7ad98d0}]{TREX: Targeting REal chemical accuracy at the EXascale}
\begin{frame}[label={sec:orgfcf3270}]{The TREX CoE}
\begin{columns}
\begin{column}{0.75\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./TREX2.png}
\end{center}
\end{column}
\begin{column}{0.25\textwidth}
\begin{exampleblock}{Codes}
\begin{itemize}
\item CHAMP
\item QMC=Chem
\item TurboRVB
\item NECI
\item Quantum Package
\item GammCor
\end{itemize}
\end{exampleblock}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org42c62d5}]{TREX: Targeting REal chemical accuracy at the EXascale}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{center}
@ -109,8 +124,10 @@
\begin{exampleblock}{Objective: Make codes ready for exascale}
How: Instead of re-writing codes, provide libraries
\begin{itemize}
\item One library for exchanging information between codes (\alert{TREXIO})
\item One library for high-performance (\alert{QMCkl})
\item A library for exchanging information between codes (\alert{TREXIO})
\(\Longrightarrow\) Enables HTC
\item A library for high-performance (\alert{QMCkl})
\(\Longrightarrow\) Enables HPC
\end{itemize}
\end{exampleblock}
\begin{exampleblock}{QMC: Quantum Monte Carlo methods}
@ -124,7 +141,63 @@ How: Instead of re-writing codes, provide libraries
\end{columns}
\end{frame}
\begin{frame}[label={sec:orgd075e20}]{Quantum Monte Carlo (QMC)}
\begin{frame}[label={sec:orgc86670a}]{I/O library (TREXIO)}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{exampleblock}{Before}
\begin{center}
\includegraphics[width=.9\linewidth]{interfaces.png}
\end{center}
\end{exampleblock}
\end{column}
\begin{column}{0.6\textwidth}
\begin{exampleblock}{After}
\begin{center}
\includegraphics[width=.9\linewidth]{interfaces2.png}
\end{center}
\end{exampleblock}
\end{column}
\end{columns}
(BSD license) \\
\url{https://github.com/trex-coe/trexio}
\end{frame}
\begin{frame}[label={sec:orgc4c0bc0}]{I/O library (TREXIO)}
\begin{columns}
\begin{column}{0.50\textwidth}
\begin{exampleblock}{Front end}
\begin{itemize}
\item Definition of an API for to read/write wave functions
\item C-compatible API: Easy bindings in other languages
\end{itemize}
\end{exampleblock}
\begin{exampleblock}{Content of the files}
\begin{itemize}
\item File is self-contained: no external knowledge needed to compute
\(\Psi(r_1,\dots,r_n)\) (normalization factors, basis et
parameters, \emph{etc})
\item Strong conventions (atomic units, ordering of cartesian orbitals, \emph{etc})
\end{itemize}
\end{exampleblock}
\end{column}
\begin{column}{0.5\textwidth}
\begin{center}
\includegraphics[width=0.7\textwidth]{./api.png}
\end{center}
\begin{exampleblock}{Back end}
\begin{itemize}
\item HDF5: Efficient I/O
\item Text: debugging, fallback when HDF5 can't be installed
\end{itemize}
\end{exampleblock}
Source code generated from a config file.
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org52e95d7}]{Quantum Monte Carlo (QMC)}
\alert{Problem}: Stochastic resolution of the Schr\"odinger equation for $N$ electrons
\begin{eqnarray}
E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
@ -150,7 +223,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org933f7ec}]{Quantum Monte Carlo (QMC)}
\begin{frame}[label={sec:org4cf1738}]{Quantum Monte Carlo (QMC)}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{itemize}
@ -168,10 +241,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org8b6768c}]{QMC kernel library (QMCkl)}
\begin{frame}[label={sec:org2aa57d2}]{QMC kernel library (QMCkl)}
\begin{block}{Computational kernels}
\begin{itemize}
\item QMCkl will contain the main kernels of QMC methods
@ -183,13 +253,11 @@ for different
\item problem sizes
\item requested accuracy (reduced precision)
\end{itemize}
\item Kernels will be scheduled with the StarPU runtime
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[label={sec:org24ef7da}]{QMC kernel library (QMCkl)}
\begin{frame}[label={sec:org91f9a85}]{QMC kernel library (QMCkl)}
\begin{block}{Two implementations}
\begin{itemize}
\item \emph{Documentation} : easy to read and understand, not necessarily efficient
@ -202,28 +270,38 @@ for different
\begin{itemize}
\item The code can stay easy to understand by the physicists/chemists
Performance-related aspects are delegated to the library
\item Changing architecture requires only linking with another
version of the library
\item Scientists can use their preferred language
\item Scientists don't lose control on their codes
\item Codes don't die when the architecture changes
\item Scientific code development does not break the performance
\item Better re-use of the optimization effort among the community
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[label={sec:org8e1f375}]{Design strategy}
\begin{enumerate}
\item Kernel extraction: QMC specialists agree on the
mathematical expression of the problem
\item A mini-application is written to find the optimal data layout
with HPC experts from real-size examples
\item The kernel is written in the documentation library
\item The documentation library is linked in a QMC code to check correctness
\item HPC experts provide an HPC version of the kernel
\item The HPC library is linked in the QMC codes of the CoE
\end{enumerate}
\begin{frame}[label={sec:org9898ced}]{HPC library}
\begin{itemize}
\item Same API as the documentation library
\item Optimization is guided by analysis with \alert{MAQAO}\footnote{https://maqao.org}.
\item Propose performance-critical choices in the API design (data
structures, memory management, \emph{etc})
\item Both CPU and GPU versions of the kernels
\item Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and
handle asynchronous CPU-GPU transfers
\end{itemize}
\end{frame}
\begin{frame}[label={sec:orge2ab500}]{Our first application : 3-body Jastrow factor}
\begin{frame}[label={sec:orgd465d61}]{Efficiently guiding the developer}
\begin{center}
\includegraphics[width=\textwidth]{./maqao1.png}
\end{center}
\end{frame}
\begin{frame}[label={sec:org2cc8e19}]{Extensive/automatic testing of different configurations}
\begin{center}
\includegraphics[width=\textwidth]{./maqao2.png}
\end{center}
\end{frame}
\begin{frame}[label={sec:org6328ebd}]{First application : 3-body Jastrow factor}
\newcommand{\Jeen}{J_{\text{een}}}
\newcommand{\Nel}{N_{\text{elec}}}
\newcommand{\Nat}{N_{\text{nucl}}}
@ -255,18 +333,151 @@ with HPC experts from real-size examples
\item Gradient and Laplacian are also required
\item Up to \(20\times\) faster than in the original code
\item \(\sim 80\%\) of the AVX-512 peak is reached
\item Using a DGEMM kernel \(\Longrightarrow\) also efficient on GPU
\item Expressed with a DGEMM kernel \(\Longrightarrow\) also efficient on GPU
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[fragile]{Numerical analysis with Verificarlo}
\textbf{Verificarlo} is a tool for assessing the precision of floating point operations.
It can be used to :
\begin{columns}
\column{0.3\textwidth}
{\centering
\includegraphics[width=80px, keepaspectratio]{img/verificarlo.png}
}\\%
{\footnotesize
\url{https://github.com/verificarlo/verificarlo} GPL v3 \\
}
\column{0.7\textwidth}
\begin{itemize}
\item \textbf{Find numerical bugs} in codes \footnotemark[1]
\begin{itemize}
\item Stochastic arithmetic to simulate round-off and cancellations
\item Localization techniques to pinpoint source of errors
\end{itemize}
\item \textbf{Optimize precision} \footnotemark[2]
\begin{itemize}
\item Simulate custom formats for mixed precision \\(float, bf16)
\item Tune precision in math library calls
\end{itemize}
\end{itemize}
\end{columns}
\footnotetext[1]{
C. Denis \textit{et al.} \href{https://dx.doi.org/10.1109/ARITH.2016.31}{doi:10.1109/ARITH.2016.31}
}
\footnotetext[2]{
Y Chatelain \textit{et al.} \href{https://dx.doi.org/10.1007/978-3-030-29400-7\_34}{doi:10.1007/978-3-030-29400-7\_34}
}
\end{frame}
\begin{frame}[fragile]{The Verificarlo pipeline}
\begin{itemize}
\item Each Floating-Point (FP) operation may introduce a $\delta$ error
$$ z = fl[x+y] = (x+y)(1+\delta) $$
\item When chaining multiple operations, errors can accumulate and snowball
\item \structure{Monte Carlo Arithmetic key principle}
\begin{itemize}
\item Make $\delta$ a \structure{random variable}
\item Use a Monte Carlo simulation to empirically estimate the FP error distribution
\end{itemize}
\end{itemize}
\begin{center}
\includegraphics[width=.8\textwidth]{img/verificarlo_pipeline.png}
\end{center}
\end{frame}
\begin{frame}{Continuous-Integration precision tracking}
\begin{itemize}
\item Each push to \structure{QMCkl} triggers a Verificarlo analysis.
\item QMCkl kernels unit tests are augmented with probes:
\begin{itemize}
\item track a scalar value precision
\item ensure that a target precision is reached
\end{itemize}
\end{itemize}
\vspace{2cm}\vfill
vfc\_probe(\tikzmark{kernel}"Sherman-Morisson", \tikzmark{var}"residual", res) \\
vfc\_probe\_assert("Sherman-Morisson", "res", res, \tikzmark{target}1e-7)
\begin{tikzpicture}[
remember picture,
overlay,
expl/.style={draw=orange,fill=orange!30,rounded corners,text width=3cm},
arrow/.style={red!80!black,ultra thick,->,>=latex}
]
\node[expl]
(kernelex)
at (2,3cm)
{Kernel name};
\node[expl]
(varex)
at (7,3cm)
{Variable name};
\node[expl]
(targetex)
at (12,3cm)
{Target precision};
\draw[arrow]
(kernelex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1.7cm]{pic cs:kernel});
\draw[arrow]
(varex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1cm]{pic cs:var});
\draw[arrow]
(targetex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=.5cm]{pic cs:target});
\end{tikzpicture}
\end{frame}
\begin{frame}[label={sec:orga0c3982}]{Verificarlo CI}
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{exampleblock}{Compare runs}
\begin{center}
\includegraphics[width=0.85\textwidth]{./img/cmp-runs.png}
\end{center}
\begin{itemize}
\item Track precision of kernels over commits
\item Shows significant digits \(s\), standard deviation \(\sigma\),
variable distribution
\end{itemize}
\end{exampleblock}
\end{column}
\begin{column}{0.5\textwidth}
\begin{exampleblock}{Inspect runs}
\begin{center}
\includegraphics[width=0.85\textwidth]{./img/inspect-runs.png}
\end{center}
\begin{itemize}
\item Focus in depth on one particular run
\item Compare multiple implementations of the same kernel
\end{itemize}
\end{exampleblock}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org5b9dcc8}]{Links}
\begin{itemize}
\item TREX web site : \url{https://trex-coe.eu}
\item QMCkl documentation : \url{https://trex-coe.github.io/qmckl}
\item QMCkl repository : \url{https://github.com/trex-coe/qmckl}
\end{itemize}
\begin{frame}[label={sec:org6ac4d38}]{Useful links}
\begin{center}
\begin{tabular}{ll}
TREX web site & \url{https://trex-coe.eu}\\
TREXIO & \url{https://github.com/trex-coe/trexio}\\
QMCkl & \url{https://github.com/trex-coe/qmckl}\\
QMCkl documentation & \url{https://trex-coe.github.io/qmckl}\\
MAQAO & \url{http://www.maqao.org}\\
Verificarlo & \url{https://github.com/verificarlo/verificarlo}\\
\end{tabular}
\end{center}
\end{frame}
\end{document}

101
verificarlo.tex Normal file
View File

@ -0,0 +1,101 @@
\end{frame}
\begin{frame}[fragile]{Numerical analysis with Verificarlo}
\textbf{Verificarlo} is a tool for assessing the precision of floating point operations.
It can be used to :
\begin{columns}
\column{0.3\textwidth}
{\centering
\includegraphics[width=80px, keepaspectratio]{img/verificarlo.png}
}\\%
{\footnotesize
\url{https://github.com/verificarlo/verificarlo} GPL v3 \\
}
\column{0.7\textwidth}
\begin{itemize}
\item \textbf{Find numerical bugs} in codes \footnotemark[1]
\begin{itemize}
\item Stochastic arithmetic to simulate round-off and cancellations
\item Localization techniques to pinpoint source of errors
\end{itemize}
\item \textbf{Optimize precision} \footnotemark[2]
\begin{itemize}
\item Simulate custom formats for mixed precision \\(float, bf16)
\item Tune precision in math library calls
\end{itemize}
\end{itemize}
\end{columns}
\footnotetext[1]{
C. Denis \textit{et al.} \href{https://dx.doi.org/10.1109/ARITH.2016.31}{doi:10.1109/ARITH.2016.31}
}
\footnotetext[2]{
Y Chatelain \textit{et al.} \href{https://dx.doi.org/10.1007/978-3-030-29400-7\_34}{doi:10.1007/978-3-030-29400-7\_34}
}
\end{frame}
\begin{frame}[fragile]{The Verificarlo pipeline}
\begin{itemize}
\item Each Floating-Point (FP) operation may introduce a $\delta$ error
$$ z = fl[x+y] = (x+y)(1+\delta) $$
\item When chaining multiple operations, errors can accumulate and snowball
\item \structure{Monte Carlo Arithmetic key principle}
\begin{itemize}
\item Make $\delta$ a \structure{random variable}
\item Use a Monte Carlo simulation to empirically estimate the FP error distribution
\end{itemize}
\end{itemize}
\begin{center}
\includegraphics[width=.8\textwidth]{img/verificarlo_pipeline.png}
\end{center}
\end{frame}
\begin{frame}{Continuous-Integration precision tracking}
\begin{itemize}
\item Each push to \structure{QMCkl} triggers a Verificarlo analysis.
\item QMCkl kernels unit tests are augmented with probes:
\begin{itemize}
\item track a scalar value precision
\item ensure that a target precision is reached
\end{itemize}
\end{itemize}
\vspace{2cm}\vfill
vfc\_probe(\tikzmark{kernel}"Sherman-Morisson", \tikzmark{var}"residual", res) \\
vfc\_probe\_assert("Sherman-Morisson", "res", res, \tikzmark{target}1e-7)
\begin{tikzpicture}[
remember picture,
overlay,
expl/.style={draw=orange,fill=orange!30,rounded corners,text width=3cm},
arrow/.style={red!80!black,ultra thick,->,>=latex}
]
\node[expl]
(kernelex)
at (2,3cm)
{Kernel name};
\node[expl]
(varex)
at (7,3cm)
{Variable name};
\node[expl]
(targetex)
at (12,3cm)
{Target precision};
\draw[arrow]
(kernelex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1.7cm]{pic cs:kernel});
\draw[arrow]
(varex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1cm]{pic cs:var});
\draw[arrow]
(targetex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=.5cm]{pic cs:target});
\end{tikzpicture}