diff --git a/TREX2.png b/TREX2.png new file mode 100644 index 0000000..1a96b93 Binary files /dev/null and b/TREX2.png differ diff --git a/interfaces.png b/interfaces.png new file mode 100644 index 0000000..34f46a6 Binary files /dev/null and b/interfaces.png differ diff --git a/interfaces2.png b/interfaces2.png new file mode 100644 index 0000000..d30c031 Binary files /dev/null and b/interfaces2.png differ diff --git a/maqao1.png b/maqao1.png new file mode 100644 index 0000000..bafa105 Binary files /dev/null and b/maqao1.png differ diff --git a/maqao2.png b/maqao2.png new file mode 100644 index 0000000..dd047a1 Binary files /dev/null and b/maqao2.png differ diff --git a/scemama.org b/scemama.org index db042c5..c1c9e3f 100644 --- a/scemama.org +++ b/scemama.org @@ -1,8 +1,9 @@ -#+TITLE: Library development within TREX -#+DATE: 12/03/2021 -#+AUTHOR: Anthony Scemama +#+TITLE: TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations +#+DATE: 02/07/2021 +#+AUTHOR: Anthony Scemama$^1$, Pablo de Oliveira Castro$^2$, Cedric Valensi$^2$, William Jalby$^2$ -#+LaTeX_HEADER: \institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)} +#+LaTeX_HEADER: \institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\ +#+LaTeX_HEADER: $^2$University of Versailles, Li-PaRAD (France)} #+LATEX_CLASS: beamer #+LaTeX_CLASS_OPTIONS:[aspectratio=169] #+BEAMER_THEME: trex @@ -20,12 +21,21 @@ #+LaTeX_HEADER: \definecolor{darkpink}{rgb}{0.7,0.0,0.7} #+LaTeX_HEADER: \newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N } #+LaTeX_HEADER: \newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N } + +#+LaTeX_HEADER: \usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex} +#+LaTeX_HEADER: \addbibresource{verificarlo.bib} +#+LaTeX_HEADER: \usepackage{graphicx} +#+LaTeX_HEADER: \usepackage[many]{tcolorbox} +#+LaTeX_HEADER: \usepackage{tikz} +#+LaTeX_HEADER: \usetikzlibrary{tikzmark,positioning} +#+LaTeX_HEADER: \definecolor{grey}{RGB}{170,170,170} + #+EXPORT_EXCLUDE_TAGS: noexport #+startup: beamer #+options: H:1 toc:nil -* Quantum chemistry +* Quantum chemistry :noexport: #+LATEX: \begin{columns} #+LATEX: \begin{column}{0.25\textwidth} @@ -68,6 +78,24 @@ #+LATEX: \end{column} #+LATEX: \end{columns} +* The TREX CoE + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.75\textwidth} + #+ATTR_LATEX: :width \textwidth + [[./TREX2.png]] + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.25\textwidth} + #+LATEX: \begin{exampleblock}{Codes} + - CHAMP + - QMC=Chem + - TurboRVB + - NECI + - Quantum Package + - GammCor + #+LATEX: \end{exampleblock} + #+LATEX: \end{column} + #+LATEX: \end{columns} + * TREX: Targeting REal chemical accuracy at the EXascale #+LATEX: \begin{columns} @@ -79,8 +107,10 @@ #+LATEX: \begin{column}{0.6\textwidth} #+LATEX: \begin{exampleblock}{Objective: Make codes ready for exascale} How: Instead of re-writing codes, provide libraries - - One library for exchanging information between codes (*TREXIO*) - - One library for high-performance (*QMCkl*) + - A library for exchanging information between codes (*TREXIO*) + $\Longrightarrow$ Enables HTC + - A library for high-performance (*QMCkl*) + $\Longrightarrow$ Enables HPC #+LATEX: \end{exampleblock} #+LATEX: \begin{exampleblock}{QMC: Quantum Monte Carlo methods} - Highly accurate @@ -90,6 +120,97 @@ #+LATEX: \end{column} #+LATEX: \end{columns} +* I/O library (TREXIO) + + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.4\textwidth} + #+LATEX: \begin{exampleblock}{Before} + #+BEGIN_SRC dot :output file :file interfaces.png +digraph G { + QP [label="Quantum Package"]; + QMCCHEM [label="QMC=Chem"]; + Turbo [label="TurboRVB"]; + QP -> NECI; + NECI -> GammCor [style="dotted"]; + NECI -> QMCCHEM [style="dotted"] ; + QP -> QMCCHEM; + QP -> CHAMP; + QP -> GammCor [style="dotted"]; + QP -> Turbo [style="dotted"]; + NECI -> Turbo [style="dotted"]; + NECI -> CHAMP [style="dotted"]; + QMCCHEM -> GammCor [style="dotted"]; + CHAMP -> GammCor [style="dotted"]; + Turbo -> GammCor [style="dotted"]; + } + #+END_SRC + #+RESULTS: + [[file:interfaces.png]] + #+LATEX: \end{exampleblock} + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.6\textwidth} + #+LATEX: \begin{exampleblock}{After} + #+BEGIN_SRC dot :output file :file interfaces2.png +digraph G { + layout=circo; + External [label="External codes"]; + QP [label="Quantum Package"]; + QMCCHEM [label="QMC=Chem"]; + Turbo [label="TurboRVB"]; + TREX [label="TREXIO File", shape="box"]; + CHAMP -> TREX; + GammCor -> TREX; + NECI -> TREX; + QMCCHEM -> TREX; + QP -> TREX; + Turbo -> TREX; + External -> TREX; + + TREX -> CHAMP; + TREX -> GammCor; + TREX -> NECI; + TREX -> QMCCHEM; + TREX -> QP; + TREX -> Turbo; + TREX -> External; + } + #+END_SRC + #+RESULTS: + [[file:interfaces2.png]] + #+LATEX: \end{exampleblock} + #+LATEX: \end{column} + #+LATEX: \end{columns} + + (BSD license) \\ + https://github.com/trex-coe/trexio + +* I/O library (TREXIO) + + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.50\textwidth} + #+LATEX: \begin{exampleblock}{Front end} + - Definition of an API for to read/write wave functions + - C-compatible API: Easy bindings in other languages + #+LATEX: \end{exampleblock} + #+LATEX: \begin{exampleblock}{Content of the files} + - File is self-contained: no external knowledge needed to compute + $\Psi(r_1,\dots,r_n)$ (normalization factors, basis et + parameters, /etc/) + - Strong conventions (atomic units, ordering of cartesian orbitals, /etc/) + #+LATEX: \end{exampleblock} + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.5\textwidth} + #+ATTR_LATEX: :width 0.7\textwidth + [[./api.png]] + #+LATEX: \begin{exampleblock}{Back end} + - HDF5: Efficient I/O + - Text: debugging, fallback when HDF5 can't be installed + #+LATEX: \end{exampleblock} + Source code generated from a config file. + #+LATEX: \end{column} + #+LATEX: \end{columns} + + * Quantum Monte Carlo (QMC) #+BEGIN_SRC latex @@ -132,9 +253,6 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} [[./Qmc.png]] #+LATEX: \end{column} #+LATEX: \end{columns} - - - * QMC kernel library (QMCkl) @@ -146,9 +264,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} - architectures - problem sizes - requested accuracy (reduced precision) - - The sequence of kernels will be scheduled with the StarPU runtime - * QMC kernel library (QMCkl) ** Two implementations @@ -159,24 +275,15 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} ** Advantages - The code can stay easy to understand by the physicists/chemists Performance-related aspects are delegated to the library - - Changing architecture requires only linking with another - version of the library + - Scientists can use their preferred language + - Scientists don't lose control on their codes + - Codes don't die when the architecture changes - Scientific code development does not break the performance - Better re-use of the optimization effort among the community -* Literate programming :noexport: - - #+BEGIN_quote - Literate programming is a programming paradigm introduced by Donald - Knuth in which a computer program is given an explanation of its - logic in a natural language, such as English, interspersed with - snippets of macros and traditional source code, from which - compilable source code can be generated. (Wikipedia) - #+END_quote - * Documentation library :noexport: Literate programming with org-mode: - - Here, comments are more important than code + - Comments are more important than code - Can add graphics, \LaTeX formulas, tables, etc - Documentation always synchronized with the code - Some routines can be generated by embedded scripts @@ -185,7 +292,24 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} - The API is C-compatible: QMCkl appears like a C library $\Longrightarrow$ can be used in all other languages -* Design strategy +* HPC library + - Same API as the documentation library + - Optimization is guided by analysis with *MAQAO*\footnote{https://maqao.org}. + - Propose performance-critical choices in the API design (data + structures, memory management, /etc/) + - Both CPU and GPU versions of the kernels + - Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and + handle asynchronous CPU-GPU transfers + +* Efficiently guiding the developer + + #+ATTR_LATEX: :width \textwidth + [[./maqao1.png]] +* Extensive/automatic testing of different configurations + + #+ATTR_LATEX: :width \textwidth + [[./maqao2.png]] +* Design strategy :noexport: 1. Kernel extraction: QMC specialists agree on the mathematical expression of the problem @@ -195,8 +319,8 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} 4. The documentation library is linked in a QMC code to check correctness 5. HPC experts provide an HPC version of the kernel 6. The HPC library is linked in the QMC codes of the CoE - -* Our first application : 3-body Jastrow factor + +* First application : 3-body Jastrow factor #+LATEX: \newcommand{\Jeen}{J_{\text{een}}} #+LATEX: \newcommand{\Nel}{N_{\text{elec}}} @@ -231,12 +355,39 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} #+LATEX: \end{column} #+LATEX: \end{columns} + +#+INCLUDE: "verificarlo.tex" export latex +* Verificarlo CI -* Links + #+LATEX: \begin{columns} + #+LATEX: \begin{column}{0.5\textwidth} + #+LATEX: \begin{exampleblock}{Compare runs} + #+ATTR_LATEX: :width 0.85\textwidth + [[./img/cmp-runs.png]] + - Track precision of kernels over commits + - Shows significant digits $s$, standard deviation $\sigma$, + variable distribution + #+LATEX: \end{exampleblock} + #+LATEX: \end{column} + #+LATEX: \begin{column}{0.5\textwidth} + #+LATEX: \begin{exampleblock}{Inspect runs} + #+ATTR_LATEX: :width 0.85\textwidth + [[./img/inspect-runs.png]] + - Focus in depth on one particular run + - Compare multiple implementations of the same kernel + #+LATEX: \end{exampleblock} + #+LATEX: \end{column} + #+LATEX: \end{columns} - - TREX web site : https://trex-coe.eu - - QMCkl documentation : https://trex-coe.github.io/qmckl - - QMCkl repository : https://github.com/trex-coe/qmckl + +* Useful links + + | TREX web site | https://trex-coe.eu | + | TREXIO | https://github.com/trex-coe/trexio | + | QMCkl | https://github.com/trex-coe/qmckl | + | QMCkl documentation | https://trex-coe.github.io/qmckl | + | MAQAO | http://www.maqao.org | + | Verificarlo | https://github.com/verificarlo/verificarlo | * Export :noexport: #+BEGIN_SRC elisp :output none diff --git a/scemama.tex b/scemama.tex index 6366e7b..c19a6ea 100644 --- a/scemama.tex +++ b/scemama.tex @@ -1,4 +1,4 @@ -% Created 2021-06-30 Wed 12:12 +% Created 2021-07-01 Thu 15:48 % Intended LaTeX compiler: pdflatex \documentclass[aspectratio=169]{beamer} \usepackage[utf8]{inputenc} @@ -14,7 +14,8 @@ \usepackage{amssymb} \usepackage{capt-of} \usepackage{hyperref} -\institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)} +\institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\ +$^2$University of Versailles, Li-PaRAD (France)} \usepackage{minted} \usemintedstyle{emacs} \newminted{f90}{fontsize=\footnotesize} @@ -29,13 +30,20 @@ \definecolor{darkpink}{rgb}{0.7,0.0,0.7} \newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N } \newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N } +\usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex} +\addbibresource{verificarlo.bib} +\usepackage{graphicx} +\usepackage[many]{tcolorbox} +\usepackage{tikz} +\usetikzlibrary{tikzmark,positioning} +\definecolor{grey}{RGB}{170,170,170} \usetheme{trex} -\author{Anthony Scemama} -\date{12/03/2021} -\title{Library development within TREX} +\author{Anthony Scemama\(^1\), Pablo de Oliveira Castro\(^2\), Cedric Valensi\(^2\), William Jalby\(^2\)} +\date{02/07/2021} +\title{TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations} \hypersetup{ - pdfauthor={Anthony Scemama}, - pdftitle={Library development within TREX}, + pdfauthor={Anthony Scemama\(^1\), Pablo de Oliveira Castro\(^2\), Cedric Valensi\(^2\), William Jalby\(^2\)}, + pdftitle={TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations}, pdfkeywords={}, pdfsubject={}, pdfcreator={Emacs 26.3 (Org mode 9.4)}, @@ -44,22 +52,7 @@ \maketitle -\begin{frame}[label={sec:org52bec56}]{Quantum chemistry} -\begin{columns} -\begin{column}{0.25\textwidth} -\begin{center} -\includegraphics[width=\textwidth]{./dirac_4.jpg} -\end{center} -\end{column} -\begin{column}{0.75\textwidth} -\begin{center} -\includegraphics[width=\textwidth]{./dirac2.png} -\end{center} -\end{column} -\end{columns} -\end{frame} - -\begin{frame}[label={sec:org2a0da55}]{Quantum chemistry} +\begin{frame}[label={sec:org0538bed}]{Quantum chemistry} \begin{columns} \begin{column}{0.6\textwidth} \begin{exampleblock}{} @@ -97,7 +90,29 @@ \end{columns} \end{frame} -\begin{frame}[label={sec:org7ad98d0}]{TREX: Targeting REal chemical accuracy at the EXascale} +\begin{frame}[label={sec:orgfcf3270}]{The TREX CoE} +\begin{columns} +\begin{column}{0.75\textwidth} +\begin{center} +\includegraphics[width=\textwidth]{./TREX2.png} +\end{center} +\end{column} +\begin{column}{0.25\textwidth} +\begin{exampleblock}{Codes} +\begin{itemize} +\item CHAMP +\item QMC=Chem +\item TurboRVB +\item NECI +\item Quantum Package +\item GammCor +\end{itemize} +\end{exampleblock} +\end{column} +\end{columns} +\end{frame} + +\begin{frame}[label={sec:org42c62d5}]{TREX: Targeting REal chemical accuracy at the EXascale} \begin{columns} \begin{column}{0.4\textwidth} \begin{center} @@ -109,8 +124,10 @@ \begin{exampleblock}{Objective: Make codes ready for exascale} How: Instead of re-writing codes, provide libraries \begin{itemize} -\item One library for exchanging information between codes (\alert{TREXIO}) -\item One library for high-performance (\alert{QMCkl}) +\item A library for exchanging information between codes (\alert{TREXIO}) +\(\Longrightarrow\) Enables HTC +\item A library for high-performance (\alert{QMCkl}) +\(\Longrightarrow\) Enables HPC \end{itemize} \end{exampleblock} \begin{exampleblock}{QMC: Quantum Monte Carlo methods} @@ -124,7 +141,63 @@ How: Instead of re-writing codes, provide libraries \end{columns} \end{frame} -\begin{frame}[label={sec:orgd075e20}]{Quantum Monte Carlo (QMC)} +\begin{frame}[label={sec:orgc86670a}]{I/O library (TREXIO)} +\begin{columns} +\begin{column}{0.4\textwidth} +\begin{exampleblock}{Before} +\begin{center} +\includegraphics[width=.9\linewidth]{interfaces.png} +\end{center} +\end{exampleblock} +\end{column} +\begin{column}{0.6\textwidth} +\begin{exampleblock}{After} +\begin{center} +\includegraphics[width=.9\linewidth]{interfaces2.png} +\end{center} +\end{exampleblock} +\end{column} +\end{columns} + +(BSD license) \\ +\url{https://github.com/trex-coe/trexio} +\end{frame} + +\begin{frame}[label={sec:orgc4c0bc0}]{I/O library (TREXIO)} +\begin{columns} +\begin{column}{0.50\textwidth} +\begin{exampleblock}{Front end} +\begin{itemize} +\item Definition of an API for to read/write wave functions +\item C-compatible API: Easy bindings in other languages +\end{itemize} +\end{exampleblock} +\begin{exampleblock}{Content of the files} +\begin{itemize} +\item File is self-contained: no external knowledge needed to compute +\(\Psi(r_1,\dots,r_n)\) (normalization factors, basis et +parameters, \emph{etc}) +\item Strong conventions (atomic units, ordering of cartesian orbitals, \emph{etc}) +\end{itemize} +\end{exampleblock} +\end{column} +\begin{column}{0.5\textwidth} +\begin{center} +\includegraphics[width=0.7\textwidth]{./api.png} +\end{center} +\begin{exampleblock}{Back end} +\begin{itemize} +\item HDF5: Efficient I/O +\item Text: debugging, fallback when HDF5 can't be installed +\end{itemize} +\end{exampleblock} +Source code generated from a config file. +\end{column} +\end{columns} +\end{frame} + + +\begin{frame}[label={sec:org52e95d7}]{Quantum Monte Carlo (QMC)} \alert{Problem}: Stochastic resolution of the Schr\"odinger equation for $N$ electrons \begin{eqnarray} E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} @@ -150,7 +223,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} \end{columns} \end{frame} -\begin{frame}[label={sec:org933f7ec}]{Quantum Monte Carlo (QMC)} +\begin{frame}[label={sec:org4cf1738}]{Quantum Monte Carlo (QMC)} \begin{columns} \begin{column}{0.4\textwidth} \begin{itemize} @@ -168,10 +241,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)} \end{columns} \end{frame} - - - -\begin{frame}[label={sec:org8b6768c}]{QMC kernel library (QMCkl)} +\begin{frame}[label={sec:org2aa57d2}]{QMC kernel library (QMCkl)} \begin{block}{Computational kernels} \begin{itemize} \item QMCkl will contain the main kernels of QMC methods @@ -183,13 +253,11 @@ for different \item problem sizes \item requested accuracy (reduced precision) \end{itemize} -\item Kernels will be scheduled with the StarPU runtime \end{itemize} \end{block} \end{frame} - -\begin{frame}[label={sec:org24ef7da}]{QMC kernel library (QMCkl)} +\begin{frame}[label={sec:org91f9a85}]{QMC kernel library (QMCkl)} \begin{block}{Two implementations} \begin{itemize} \item \emph{Documentation} : easy to read and understand, not necessarily efficient @@ -202,28 +270,38 @@ for different \begin{itemize} \item The code can stay easy to understand by the physicists/chemists Performance-related aspects are delegated to the library -\item Changing architecture requires only linking with another -version of the library +\item Scientists can use their preferred language +\item Scientists don't lose control on their codes +\item Codes don't die when the architecture changes \item Scientific code development does not break the performance \item Better re-use of the optimization effort among the community \end{itemize} \end{block} \end{frame} -\begin{frame}[label={sec:org8e1f375}]{Design strategy} -\begin{enumerate} -\item Kernel extraction: QMC specialists agree on the -mathematical expression of the problem -\item A mini-application is written to find the optimal data layout -with HPC experts from real-size examples -\item The kernel is written in the documentation library -\item The documentation library is linked in a QMC code to check correctness -\item HPC experts provide an HPC version of the kernel -\item The HPC library is linked in the QMC codes of the CoE -\end{enumerate} +\begin{frame}[label={sec:org9898ced}]{HPC library} +\begin{itemize} +\item Same API as the documentation library +\item Optimization is guided by analysis with \alert{MAQAO}\footnote{https://maqao.org}. +\item Propose performance-critical choices in the API design (data +structures, memory management, \emph{etc}) +\item Both CPU and GPU versions of the kernels +\item Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and +handle asynchronous CPU-GPU transfers +\end{itemize} \end{frame} -\begin{frame}[label={sec:orge2ab500}]{Our first application : 3-body Jastrow factor} +\begin{frame}[label={sec:orgd465d61}]{Efficiently guiding the developer} +\begin{center} +\includegraphics[width=\textwidth]{./maqao1.png} +\end{center} +\end{frame} +\begin{frame}[label={sec:org2cc8e19}]{Extensive/automatic testing of different configurations} +\begin{center} +\includegraphics[width=\textwidth]{./maqao2.png} +\end{center} +\end{frame} +\begin{frame}[label={sec:org6328ebd}]{First application : 3-body Jastrow factor} \newcommand{\Jeen}{J_{\text{een}}} \newcommand{\Nel}{N_{\text{elec}}} \newcommand{\Nat}{N_{\text{nucl}}} @@ -255,18 +333,151 @@ with HPC experts from real-size examples \item Gradient and Laplacian are also required \item Up to \(20\times\) faster than in the original code \item \(\sim 80\%\) of the AVX-512 peak is reached -\item Using a DGEMM kernel \(\Longrightarrow\) also efficient on GPU +\item Expressed with a DGEMM kernel \(\Longrightarrow\) also efficient on GPU \end{itemize} \end{column} \end{columns} + + +\end{frame} +\begin{frame}[fragile]{Numerical analysis with Verificarlo} + + + \textbf{Verificarlo} is a tool for assessing the precision of floating point operations. + It can be used to : + + \begin{columns} + \column{0.3\textwidth} + {\centering + \includegraphics[width=80px, keepaspectratio]{img/verificarlo.png} + }\\% + + {\footnotesize + \url{https://github.com/verificarlo/verificarlo} GPL v3 \\ + } + \column{0.7\textwidth} + + \begin{itemize} + \item \textbf{Find numerical bugs} in codes \footnotemark[1] + \begin{itemize} + \item Stochastic arithmetic to simulate round-off and cancellations + \item Localization techniques to pinpoint source of errors + \end{itemize} + + \item \textbf{Optimize precision} \footnotemark[2] + \begin{itemize} + \item Simulate custom formats for mixed precision \\(float, bf16) + \item Tune precision in math library calls + \end{itemize} + \end{itemize} + + \end{columns} +\footnotetext[1]{ +C. Denis \textit{et al.} \href{https://dx.doi.org/10.1109/ARITH.2016.31}{doi:10.1109/ARITH.2016.31} +} +\footnotetext[2]{ +Y Chatelain \textit{et al.} \href{https://dx.doi.org/10.1007/978-3-030-29400-7\_34}{doi:10.1007/978-3-030-29400-7\_34} +} + +\end{frame} + +\begin{frame}[fragile]{The Verificarlo pipeline} + \begin{itemize} + \item Each Floating-Point (FP) operation may introduce a $\delta$ error + $$ z = fl[x+y] = (x+y)(1+\delta) $$ + \item When chaining multiple operations, errors can accumulate and snowball + \item \structure{Monte Carlo Arithmetic key principle} + \begin{itemize} + \item Make $\delta$ a \structure{random variable} + \item Use a Monte Carlo simulation to empirically estimate the FP error distribution + \end{itemize} + \end{itemize} + + \begin{center} + \includegraphics[width=.8\textwidth]{img/verificarlo_pipeline.png} + \end{center} +\end{frame} + +\begin{frame}{Continuous-Integration precision tracking} + +\begin{itemize} + \item Each push to \structure{QMCkl} triggers a Verificarlo analysis. + \item QMCkl kernels unit tests are augmented with probes: + \begin{itemize} + \item track a scalar value precision + \item ensure that a target precision is reached + \end{itemize} +\end{itemize} +\vspace{2cm}\vfill + +vfc\_probe(\tikzmark{kernel}"Sherman-Morisson", \tikzmark{var}"residual", res) \\ +vfc\_probe\_assert("Sherman-Morisson", "res", res, \tikzmark{target}1e-7) + +\begin{tikzpicture}[ + remember picture, + overlay, + expl/.style={draw=orange,fill=orange!30,rounded corners,text width=3cm}, + arrow/.style={red!80!black,ultra thick,->,>=latex} +] +\node[expl] + (kernelex) + at (2,3cm) + {Kernel name}; +\node[expl] + (varex) + at (7,3cm) + {Variable name}; +\node[expl] + (targetex) + at (12,3cm) + {Target precision}; +\draw[arrow] + (kernelex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1.7cm]{pic cs:kernel}); +\draw[arrow] + (varex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1cm]{pic cs:var}); +\draw[arrow] + (targetex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=.5cm]{pic cs:target}); +\end{tikzpicture} +\end{frame} +\begin{frame}[label={sec:orga0c3982}]{Verificarlo CI} +\begin{columns} +\begin{column}{0.5\textwidth} +\begin{exampleblock}{Compare runs} +\begin{center} +\includegraphics[width=0.85\textwidth]{./img/cmp-runs.png} +\end{center} +\begin{itemize} +\item Track precision of kernels over commits +\item Shows significant digits \(s\), standard deviation \(\sigma\), +variable distribution +\end{itemize} +\end{exampleblock} +\end{column} +\begin{column}{0.5\textwidth} +\begin{exampleblock}{Inspect runs} +\begin{center} +\includegraphics[width=0.85\textwidth]{./img/inspect-runs.png} +\end{center} +\begin{itemize} +\item Focus in depth on one particular run +\item Compare multiple implementations of the same kernel +\end{itemize} +\end{exampleblock} +\end{column} +\end{columns} \end{frame} -\begin{frame}[label={sec:org5b9dcc8}]{Links} -\begin{itemize} -\item TREX web site : \url{https://trex-coe.eu} -\item QMCkl documentation : \url{https://trex-coe.github.io/qmckl} -\item QMCkl repository : \url{https://github.com/trex-coe/qmckl} -\end{itemize} +\begin{frame}[label={sec:org6ac4d38}]{Useful links} +\begin{center} +\begin{tabular}{ll} +TREX web site & \url{https://trex-coe.eu}\\ +TREXIO & \url{https://github.com/trex-coe/trexio}\\ +QMCkl & \url{https://github.com/trex-coe/qmckl}\\ +QMCkl documentation & \url{https://trex-coe.github.io/qmckl}\\ +MAQAO & \url{http://www.maqao.org}\\ +Verificarlo & \url{https://github.com/verificarlo/verificarlo}\\ +\end{tabular} +\end{center} \end{frame} \end{document} \ No newline at end of file diff --git a/verificarlo.tex b/verificarlo.tex new file mode 100644 index 0000000..7ec3a1c --- /dev/null +++ b/verificarlo.tex @@ -0,0 +1,101 @@ +\end{frame} +\begin{frame}[fragile]{Numerical analysis with Verificarlo} + + + \textbf{Verificarlo} is a tool for assessing the precision of floating point operations. + It can be used to : + + \begin{columns} + \column{0.3\textwidth} + {\centering + \includegraphics[width=80px, keepaspectratio]{img/verificarlo.png} + }\\% + + {\footnotesize + \url{https://github.com/verificarlo/verificarlo} GPL v3 \\ + } + \column{0.7\textwidth} + + \begin{itemize} + \item \textbf{Find numerical bugs} in codes \footnotemark[1] + \begin{itemize} + \item Stochastic arithmetic to simulate round-off and cancellations + \item Localization techniques to pinpoint source of errors + \end{itemize} + + \item \textbf{Optimize precision} \footnotemark[2] + \begin{itemize} + \item Simulate custom formats for mixed precision \\(float, bf16) + \item Tune precision in math library calls + \end{itemize} + \end{itemize} + + \end{columns} +\footnotetext[1]{ +C. Denis \textit{et al.} \href{https://dx.doi.org/10.1109/ARITH.2016.31}{doi:10.1109/ARITH.2016.31} +} +\footnotetext[2]{ +Y Chatelain \textit{et al.} \href{https://dx.doi.org/10.1007/978-3-030-29400-7\_34}{doi:10.1007/978-3-030-29400-7\_34} +} + +\end{frame} + +\begin{frame}[fragile]{The Verificarlo pipeline} + \begin{itemize} + \item Each Floating-Point (FP) operation may introduce a $\delta$ error + $$ z = fl[x+y] = (x+y)(1+\delta) $$ + \item When chaining multiple operations, errors can accumulate and snowball + \item \structure{Monte Carlo Arithmetic key principle} + \begin{itemize} + \item Make $\delta$ a \structure{random variable} + \item Use a Monte Carlo simulation to empirically estimate the FP error distribution + \end{itemize} + \end{itemize} + + \begin{center} + \includegraphics[width=.8\textwidth]{img/verificarlo_pipeline.png} + \end{center} +\end{frame} + +\begin{frame}{Continuous-Integration precision tracking} + +\begin{itemize} + \item Each push to \structure{QMCkl} triggers a Verificarlo analysis. + \item QMCkl kernels unit tests are augmented with probes: + \begin{itemize} + \item track a scalar value precision + \item ensure that a target precision is reached + \end{itemize} +\end{itemize} +\vspace{2cm}\vfill + +vfc\_probe(\tikzmark{kernel}"Sherman-Morisson", \tikzmark{var}"residual", res) \\ +vfc\_probe\_assert("Sherman-Morisson", "res", res, \tikzmark{target}1e-7) + +\begin{tikzpicture}[ + remember picture, + overlay, + expl/.style={draw=orange,fill=orange!30,rounded corners,text width=3cm}, + arrow/.style={red!80!black,ultra thick,->,>=latex} +] +\node[expl] + (kernelex) + at (2,3cm) + {Kernel name}; +\node[expl] + (varex) + at (7,3cm) + {Variable name}; +\node[expl] + (targetex) + at (12,3cm) + {Target precision}; +\draw[arrow] + (kernelex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1.7cm]{pic cs:kernel}); +\draw[arrow] + (varex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1cm]{pic cs:var}); +\draw[arrow] + (targetex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=.5cm]{pic cs:target}); +\end{tikzpicture} + +