final version
This commit is contained in:
parent
22a66f5d6b
commit
c57746aabf
BIN
interfaces.png
Normal file
BIN
interfaces.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 43 KiB |
BIN
interfaces2.png
Normal file
BIN
interfaces2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 72 KiB |
BIN
maqao1.png
Normal file
BIN
maqao1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 322 KiB |
BIN
maqao2.png
Normal file
BIN
maqao2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 370 KiB |
215
scemama.org
215
scemama.org
@ -1,8 +1,9 @@
|
||||
#+TITLE: Library development within TREX
|
||||
#+DATE: 12/03/2021
|
||||
#+AUTHOR: Anthony Scemama
|
||||
#+TITLE: TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations
|
||||
#+DATE: 02/07/2021
|
||||
#+AUTHOR: Anthony Scemama$^1$, Pablo de Oliveira Castro$^2$, Cedric Valensi$^2$, William Jalby$^2$
|
||||
|
||||
#+LaTeX_HEADER: \institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)}
|
||||
#+LaTeX_HEADER: \institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\
|
||||
#+LaTeX_HEADER: $^2$University of Versailles, Li-PaRAD (France)}
|
||||
#+LATEX_CLASS: beamer
|
||||
#+LaTeX_CLASS_OPTIONS:[aspectratio=169]
|
||||
#+BEAMER_THEME: trex
|
||||
@ -20,12 +21,21 @@
|
||||
#+LaTeX_HEADER: \definecolor{darkpink}{rgb}{0.7,0.0,0.7}
|
||||
#+LaTeX_HEADER: \newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
|
||||
#+LaTeX_HEADER: \newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
|
||||
|
||||
#+LaTeX_HEADER: \usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex}
|
||||
#+LaTeX_HEADER: \addbibresource{verificarlo.bib}
|
||||
#+LaTeX_HEADER: \usepackage{graphicx}
|
||||
#+LaTeX_HEADER: \usepackage[many]{tcolorbox}
|
||||
#+LaTeX_HEADER: \usepackage{tikz}
|
||||
#+LaTeX_HEADER: \usetikzlibrary{tikzmark,positioning}
|
||||
#+LaTeX_HEADER: \definecolor{grey}{RGB}{170,170,170}
|
||||
|
||||
#+EXPORT_EXCLUDE_TAGS: noexport
|
||||
|
||||
#+startup: beamer
|
||||
#+options: H:1 toc:nil
|
||||
|
||||
* Quantum chemistry
|
||||
* Quantum chemistry :noexport:
|
||||
|
||||
#+LATEX: \begin{columns}
|
||||
#+LATEX: \begin{column}{0.25\textwidth}
|
||||
@ -68,6 +78,24 @@
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
* The TREX CoE
|
||||
#+LATEX: \begin{columns}
|
||||
#+LATEX: \begin{column}{0.75\textwidth}
|
||||
#+ATTR_LATEX: :width \textwidth
|
||||
[[./TREX2.png]]
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \begin{column}{0.25\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{Codes}
|
||||
- CHAMP
|
||||
- QMC=Chem
|
||||
- TurboRVB
|
||||
- NECI
|
||||
- Quantum Package
|
||||
- GammCor
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
* TREX: Targeting REal chemical accuracy at the EXascale
|
||||
|
||||
#+LATEX: \begin{columns}
|
||||
@ -79,8 +107,10 @@
|
||||
#+LATEX: \begin{column}{0.6\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{Objective: Make codes ready for exascale}
|
||||
How: Instead of re-writing codes, provide libraries
|
||||
- One library for exchanging information between codes (*TREXIO*)
|
||||
- One library for high-performance (*QMCkl*)
|
||||
- A library for exchanging information between codes (*TREXIO*)
|
||||
$\Longrightarrow$ Enables HTC
|
||||
- A library for high-performance (*QMCkl*)
|
||||
$\Longrightarrow$ Enables HPC
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \begin{exampleblock}{QMC: Quantum Monte Carlo methods}
|
||||
- Highly accurate
|
||||
@ -90,6 +120,97 @@
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
* I/O library (TREXIO)
|
||||
|
||||
#+LATEX: \begin{columns}
|
||||
#+LATEX: \begin{column}{0.4\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{Before}
|
||||
#+BEGIN_SRC dot :output file :file interfaces.png
|
||||
digraph G {
|
||||
QP [label="Quantum Package"];
|
||||
QMCCHEM [label="QMC=Chem"];
|
||||
Turbo [label="TurboRVB"];
|
||||
QP -> NECI;
|
||||
NECI -> GammCor [style="dotted"];
|
||||
NECI -> QMCCHEM [style="dotted"] ;
|
||||
QP -> QMCCHEM;
|
||||
QP -> CHAMP;
|
||||
QP -> GammCor [style="dotted"];
|
||||
QP -> Turbo [style="dotted"];
|
||||
NECI -> Turbo [style="dotted"];
|
||||
NECI -> CHAMP [style="dotted"];
|
||||
QMCCHEM -> GammCor [style="dotted"];
|
||||
CHAMP -> GammCor [style="dotted"];
|
||||
Turbo -> GammCor [style="dotted"];
|
||||
}
|
||||
#+END_SRC
|
||||
#+RESULTS:
|
||||
[[file:interfaces.png]]
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \begin{column}{0.6\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{After}
|
||||
#+BEGIN_SRC dot :output file :file interfaces2.png
|
||||
digraph G {
|
||||
layout=circo;
|
||||
External [label="External codes"];
|
||||
QP [label="Quantum Package"];
|
||||
QMCCHEM [label="QMC=Chem"];
|
||||
Turbo [label="TurboRVB"];
|
||||
TREX [label="TREXIO File", shape="box"];
|
||||
CHAMP -> TREX;
|
||||
GammCor -> TREX;
|
||||
NECI -> TREX;
|
||||
QMCCHEM -> TREX;
|
||||
QP -> TREX;
|
||||
Turbo -> TREX;
|
||||
External -> TREX;
|
||||
|
||||
TREX -> CHAMP;
|
||||
TREX -> GammCor;
|
||||
TREX -> NECI;
|
||||
TREX -> QMCCHEM;
|
||||
TREX -> QP;
|
||||
TREX -> Turbo;
|
||||
TREX -> External;
|
||||
}
|
||||
#+END_SRC
|
||||
#+RESULTS:
|
||||
[[file:interfaces2.png]]
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
(BSD license) \\
|
||||
https://github.com/trex-coe/trexio
|
||||
|
||||
* I/O library (TREXIO)
|
||||
|
||||
#+LATEX: \begin{columns}
|
||||
#+LATEX: \begin{column}{0.50\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{Front end}
|
||||
- Definition of an API for to read/write wave functions
|
||||
- C-compatible API: Easy bindings in other languages
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \begin{exampleblock}{Content of the files}
|
||||
- File is self-contained: no external knowledge needed to compute
|
||||
$\Psi(r_1,\dots,r_n)$ (normalization factors, basis et
|
||||
parameters, /etc/)
|
||||
- Strong conventions (atomic units, ordering of cartesian orbitals, /etc/)
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \begin{column}{0.5\textwidth}
|
||||
#+ATTR_LATEX: :width 0.7\textwidth
|
||||
[[./api.png]]
|
||||
#+LATEX: \begin{exampleblock}{Back end}
|
||||
- HDF5: Efficient I/O
|
||||
- Text: debugging, fallback when HDF5 can't be installed
|
||||
#+LATEX: \end{exampleblock}
|
||||
Source code generated from a config file.
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
|
||||
* Quantum Monte Carlo (QMC)
|
||||
|
||||
#+BEGIN_SRC latex
|
||||
@ -132,9 +253,6 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
[[./Qmc.png]]
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
|
||||
|
||||
|
||||
* QMC kernel library (QMCkl)
|
||||
|
||||
@ -146,9 +264,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
- architectures
|
||||
- problem sizes
|
||||
- requested accuracy (reduced precision)
|
||||
- The sequence of kernels will be scheduled with the StarPU runtime
|
||||
|
||||
|
||||
* QMC kernel library (QMCkl)
|
||||
|
||||
** Two implementations
|
||||
@ -159,24 +275,15 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
** Advantages
|
||||
- The code can stay easy to understand by the physicists/chemists
|
||||
Performance-related aspects are delegated to the library
|
||||
- Changing architecture requires only linking with another
|
||||
version of the library
|
||||
- Scientists can use their preferred language
|
||||
- Scientists don't lose control on their codes
|
||||
- Codes don't die when the architecture changes
|
||||
- Scientific code development does not break the performance
|
||||
- Better re-use of the optimization effort among the community
|
||||
|
||||
* Literate programming :noexport:
|
||||
|
||||
#+BEGIN_quote
|
||||
Literate programming is a programming paradigm introduced by Donald
|
||||
Knuth in which a computer program is given an explanation of its
|
||||
logic in a natural language, such as English, interspersed with
|
||||
snippets of macros and traditional source code, from which
|
||||
compilable source code can be generated. (Wikipedia)
|
||||
#+END_quote
|
||||
|
||||
* Documentation library :noexport:
|
||||
Literate programming with org-mode:
|
||||
- Here, comments are more important than code
|
||||
- Comments are more important than code
|
||||
- Can add graphics, \LaTeX formulas, tables, etc
|
||||
- Documentation always synchronized with the code
|
||||
- Some routines can be generated by embedded scripts
|
||||
@ -185,7 +292,24 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
- The API is C-compatible: QMCkl appears like a C library
|
||||
$\Longrightarrow$ can be used in all other languages
|
||||
|
||||
* Design strategy
|
||||
* HPC library
|
||||
- Same API as the documentation library
|
||||
- Optimization is guided by analysis with *MAQAO*\footnote{https://maqao.org}.
|
||||
- Propose performance-critical choices in the API design (data
|
||||
structures, memory management, /etc/)
|
||||
- Both CPU and GPU versions of the kernels
|
||||
- Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and
|
||||
handle asynchronous CPU-GPU transfers
|
||||
|
||||
* Efficiently guiding the developer
|
||||
|
||||
#+ATTR_LATEX: :width \textwidth
|
||||
[[./maqao1.png]]
|
||||
* Extensive/automatic testing of different configurations
|
||||
|
||||
#+ATTR_LATEX: :width \textwidth
|
||||
[[./maqao2.png]]
|
||||
* Design strategy :noexport:
|
||||
|
||||
1. Kernel extraction: QMC specialists agree on the
|
||||
mathematical expression of the problem
|
||||
@ -195,8 +319,8 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
4. The documentation library is linked in a QMC code to check correctness
|
||||
5. HPC experts provide an HPC version of the kernel
|
||||
6. The HPC library is linked in the QMC codes of the CoE
|
||||
|
||||
* Our first application : 3-body Jastrow factor
|
||||
|
||||
* First application : 3-body Jastrow factor
|
||||
|
||||
#+LATEX: \newcommand{\Jeen}{J_{\text{een}}}
|
||||
#+LATEX: \newcommand{\Nel}{N_{\text{elec}}}
|
||||
@ -231,12 +355,39 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
|
||||
#+INCLUDE: "verificarlo.tex" export latex
|
||||
* Verificarlo CI
|
||||
|
||||
* Links
|
||||
#+LATEX: \begin{columns}
|
||||
#+LATEX: \begin{column}{0.5\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{Compare runs}
|
||||
#+ATTR_LATEX: :width 0.85\textwidth
|
||||
[[./img/cmp-runs.png]]
|
||||
- Track precision of kernels over commits
|
||||
- Shows significant digits $s$, standard deviation $\sigma$,
|
||||
variable distribution
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \begin{column}{0.5\textwidth}
|
||||
#+LATEX: \begin{exampleblock}{Inspect runs}
|
||||
#+ATTR_LATEX: :width 0.85\textwidth
|
||||
[[./img/inspect-runs.png]]
|
||||
- Focus in depth on one particular run
|
||||
- Compare multiple implementations of the same kernel
|
||||
#+LATEX: \end{exampleblock}
|
||||
#+LATEX: \end{column}
|
||||
#+LATEX: \end{columns}
|
||||
|
||||
- TREX web site : https://trex-coe.eu
|
||||
- QMCkl documentation : https://trex-coe.github.io/qmckl
|
||||
- QMCkl repository : https://github.com/trex-coe/qmckl
|
||||
|
||||
* Useful links
|
||||
|
||||
| TREX web site | https://trex-coe.eu |
|
||||
| TREXIO | https://github.com/trex-coe/trexio |
|
||||
| QMCkl | https://github.com/trex-coe/qmckl |
|
||||
| QMCkl documentation | https://trex-coe.github.io/qmckl |
|
||||
| MAQAO | http://www.maqao.org |
|
||||
| Verificarlo | https://github.com/verificarlo/verificarlo |
|
||||
|
||||
* Export :noexport:
|
||||
#+BEGIN_SRC elisp :output none
|
||||
|
323
scemama.tex
323
scemama.tex
@ -1,4 +1,4 @@
|
||||
% Created 2021-06-30 Wed 12:12
|
||||
% Created 2021-07-01 Thu 15:48
|
||||
% Intended LaTeX compiler: pdflatex
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
\usepackage[utf8]{inputenc}
|
||||
@ -14,7 +14,8 @@
|
||||
\usepackage{amssymb}
|
||||
\usepackage{capt-of}
|
||||
\usepackage{hyperref}
|
||||
\institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)}
|
||||
\institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\
|
||||
$^2$University of Versailles, Li-PaRAD (France)}
|
||||
\usepackage{minted}
|
||||
\usemintedstyle{emacs}
|
||||
\newminted{f90}{fontsize=\footnotesize}
|
||||
@ -29,13 +30,20 @@
|
||||
\definecolor{darkpink}{rgb}{0.7,0.0,0.7}
|
||||
\newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
|
||||
\newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
|
||||
\usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex}
|
||||
\addbibresource{verificarlo.bib}
|
||||
\usepackage{graphicx}
|
||||
\usepackage[many]{tcolorbox}
|
||||
\usepackage{tikz}
|
||||
\usetikzlibrary{tikzmark,positioning}
|
||||
\definecolor{grey}{RGB}{170,170,170}
|
||||
\usetheme{trex}
|
||||
\author{Anthony Scemama}
|
||||
\date{12/03/2021}
|
||||
\title{Library development within TREX}
|
||||
\author{Anthony Scemama\(^1\), Pablo de Oliveira Castro\(^2\), Cedric Valensi\(^2\), William Jalby\(^2\)}
|
||||
\date{02/07/2021}
|
||||
\title{TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations}
|
||||
\hypersetup{
|
||||
pdfauthor={Anthony Scemama},
|
||||
pdftitle={Library development within TREX},
|
||||
pdfauthor={Anthony Scemama\(^1\), Pablo de Oliveira Castro\(^2\), Cedric Valensi\(^2\), William Jalby\(^2\)},
|
||||
pdftitle={TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations},
|
||||
pdfkeywords={},
|
||||
pdfsubject={},
|
||||
pdfcreator={Emacs 26.3 (Org mode 9.4)},
|
||||
@ -44,22 +52,7 @@
|
||||
|
||||
\maketitle
|
||||
|
||||
\begin{frame}[label={sec:org52bec56}]{Quantum chemistry}
|
||||
\begin{columns}
|
||||
\begin{column}{0.25\textwidth}
|
||||
\begin{center}
|
||||
\includegraphics[width=\textwidth]{./dirac_4.jpg}
|
||||
\end{center}
|
||||
\end{column}
|
||||
\begin{column}{0.75\textwidth}
|
||||
\begin{center}
|
||||
\includegraphics[width=\textwidth]{./dirac2.png}
|
||||
\end{center}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:org2a0da55}]{Quantum chemistry}
|
||||
\begin{frame}[label={sec:org0538bed}]{Quantum chemistry}
|
||||
\begin{columns}
|
||||
\begin{column}{0.6\textwidth}
|
||||
\begin{exampleblock}{}
|
||||
@ -97,7 +90,29 @@
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:org7ad98d0}]{TREX: Targeting REal chemical accuracy at the EXascale}
|
||||
\begin{frame}[label={sec:orgfcf3270}]{The TREX CoE}
|
||||
\begin{columns}
|
||||
\begin{column}{0.75\textwidth}
|
||||
\begin{center}
|
||||
\includegraphics[width=\textwidth]{./TREX2.png}
|
||||
\end{center}
|
||||
\end{column}
|
||||
\begin{column}{0.25\textwidth}
|
||||
\begin{exampleblock}{Codes}
|
||||
\begin{itemize}
|
||||
\item CHAMP
|
||||
\item QMC=Chem
|
||||
\item TurboRVB
|
||||
\item NECI
|
||||
\item Quantum Package
|
||||
\item GammCor
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:org42c62d5}]{TREX: Targeting REal chemical accuracy at the EXascale}
|
||||
\begin{columns}
|
||||
\begin{column}{0.4\textwidth}
|
||||
\begin{center}
|
||||
@ -109,8 +124,10 @@
|
||||
\begin{exampleblock}{Objective: Make codes ready for exascale}
|
||||
How: Instead of re-writing codes, provide libraries
|
||||
\begin{itemize}
|
||||
\item One library for exchanging information between codes (\alert{TREXIO})
|
||||
\item One library for high-performance (\alert{QMCkl})
|
||||
\item A library for exchanging information between codes (\alert{TREXIO})
|
||||
\(\Longrightarrow\) Enables HTC
|
||||
\item A library for high-performance (\alert{QMCkl})
|
||||
\(\Longrightarrow\) Enables HPC
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
\begin{exampleblock}{QMC: Quantum Monte Carlo methods}
|
||||
@ -124,7 +141,63 @@ How: Instead of re-writing codes, provide libraries
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:orgd075e20}]{Quantum Monte Carlo (QMC)}
|
||||
\begin{frame}[label={sec:orgc86670a}]{I/O library (TREXIO)}
|
||||
\begin{columns}
|
||||
\begin{column}{0.4\textwidth}
|
||||
\begin{exampleblock}{Before}
|
||||
\begin{center}
|
||||
\includegraphics[width=.9\linewidth]{interfaces.png}
|
||||
\end{center}
|
||||
\end{exampleblock}
|
||||
\end{column}
|
||||
\begin{column}{0.6\textwidth}
|
||||
\begin{exampleblock}{After}
|
||||
\begin{center}
|
||||
\includegraphics[width=.9\linewidth]{interfaces2.png}
|
||||
\end{center}
|
||||
\end{exampleblock}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
|
||||
(BSD license) \\
|
||||
\url{https://github.com/trex-coe/trexio}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:orgc4c0bc0}]{I/O library (TREXIO)}
|
||||
\begin{columns}
|
||||
\begin{column}{0.50\textwidth}
|
||||
\begin{exampleblock}{Front end}
|
||||
\begin{itemize}
|
||||
\item Definition of an API for to read/write wave functions
|
||||
\item C-compatible API: Easy bindings in other languages
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
\begin{exampleblock}{Content of the files}
|
||||
\begin{itemize}
|
||||
\item File is self-contained: no external knowledge needed to compute
|
||||
\(\Psi(r_1,\dots,r_n)\) (normalization factors, basis et
|
||||
parameters, \emph{etc})
|
||||
\item Strong conventions (atomic units, ordering of cartesian orbitals, \emph{etc})
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
\end{column}
|
||||
\begin{column}{0.5\textwidth}
|
||||
\begin{center}
|
||||
\includegraphics[width=0.7\textwidth]{./api.png}
|
||||
\end{center}
|
||||
\begin{exampleblock}{Back end}
|
||||
\begin{itemize}
|
||||
\item HDF5: Efficient I/O
|
||||
\item Text: debugging, fallback when HDF5 can't be installed
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
Source code generated from a config file.
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[label={sec:org52e95d7}]{Quantum Monte Carlo (QMC)}
|
||||
\alert{Problem}: Stochastic resolution of the Schr\"odinger equation for $N$ electrons
|
||||
\begin{eqnarray}
|
||||
E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
@ -150,7 +223,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:org933f7ec}]{Quantum Monte Carlo (QMC)}
|
||||
\begin{frame}[label={sec:org4cf1738}]{Quantum Monte Carlo (QMC)}
|
||||
\begin{columns}
|
||||
\begin{column}{0.4\textwidth}
|
||||
\begin{itemize}
|
||||
@ -168,10 +241,7 @@ E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
|
||||
|
||||
\begin{frame}[label={sec:org8b6768c}]{QMC kernel library (QMCkl)}
|
||||
\begin{frame}[label={sec:org2aa57d2}]{QMC kernel library (QMCkl)}
|
||||
\begin{block}{Computational kernels}
|
||||
\begin{itemize}
|
||||
\item QMCkl will contain the main kernels of QMC methods
|
||||
@ -183,13 +253,11 @@ for different
|
||||
\item problem sizes
|
||||
\item requested accuracy (reduced precision)
|
||||
\end{itemize}
|
||||
\item Kernels will be scheduled with the StarPU runtime
|
||||
\end{itemize}
|
||||
\end{block}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[label={sec:org24ef7da}]{QMC kernel library (QMCkl)}
|
||||
\begin{frame}[label={sec:org91f9a85}]{QMC kernel library (QMCkl)}
|
||||
\begin{block}{Two implementations}
|
||||
\begin{itemize}
|
||||
\item \emph{Documentation} : easy to read and understand, not necessarily efficient
|
||||
@ -202,28 +270,38 @@ for different
|
||||
\begin{itemize}
|
||||
\item The code can stay easy to understand by the physicists/chemists
|
||||
Performance-related aspects are delegated to the library
|
||||
\item Changing architecture requires only linking with another
|
||||
version of the library
|
||||
\item Scientists can use their preferred language
|
||||
\item Scientists don't lose control on their codes
|
||||
\item Codes don't die when the architecture changes
|
||||
\item Scientific code development does not break the performance
|
||||
\item Better re-use of the optimization effort among the community
|
||||
\end{itemize}
|
||||
\end{block}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:org8e1f375}]{Design strategy}
|
||||
\begin{enumerate}
|
||||
\item Kernel extraction: QMC specialists agree on the
|
||||
mathematical expression of the problem
|
||||
\item A mini-application is written to find the optimal data layout
|
||||
with HPC experts from real-size examples
|
||||
\item The kernel is written in the documentation library
|
||||
\item The documentation library is linked in a QMC code to check correctness
|
||||
\item HPC experts provide an HPC version of the kernel
|
||||
\item The HPC library is linked in the QMC codes of the CoE
|
||||
\end{enumerate}
|
||||
\begin{frame}[label={sec:org9898ced}]{HPC library}
|
||||
\begin{itemize}
|
||||
\item Same API as the documentation library
|
||||
\item Optimization is guided by analysis with \alert{MAQAO}\footnote{https://maqao.org}.
|
||||
\item Propose performance-critical choices in the API design (data
|
||||
structures, memory management, \emph{etc})
|
||||
\item Both CPU and GPU versions of the kernels
|
||||
\item Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and
|
||||
handle asynchronous CPU-GPU transfers
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[label={sec:orge2ab500}]{Our first application : 3-body Jastrow factor}
|
||||
\begin{frame}[label={sec:orgd465d61}]{Efficiently guiding the developer}
|
||||
\begin{center}
|
||||
\includegraphics[width=\textwidth]{./maqao1.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
\begin{frame}[label={sec:org2cc8e19}]{Extensive/automatic testing of different configurations}
|
||||
\begin{center}
|
||||
\includegraphics[width=\textwidth]{./maqao2.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
\begin{frame}[label={sec:org6328ebd}]{First application : 3-body Jastrow factor}
|
||||
\newcommand{\Jeen}{J_{\text{een}}}
|
||||
\newcommand{\Nel}{N_{\text{elec}}}
|
||||
\newcommand{\Nat}{N_{\text{nucl}}}
|
||||
@ -255,18 +333,151 @@ with HPC experts from real-size examples
|
||||
\item Gradient and Laplacian are also required
|
||||
\item Up to \(20\times\) faster than in the original code
|
||||
\item \(\sim 80\%\) of the AVX-512 peak is reached
|
||||
\item Using a DGEMM kernel \(\Longrightarrow\) also efficient on GPU
|
||||
\item Expressed with a DGEMM kernel \(\Longrightarrow\) also efficient on GPU
|
||||
\end{itemize}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
|
||||
|
||||
\end{frame}
|
||||
\begin{frame}[fragile]{Numerical analysis with Verificarlo}
|
||||
|
||||
|
||||
\textbf{Verificarlo} is a tool for assessing the precision of floating point operations.
|
||||
It can be used to :
|
||||
|
||||
\begin{columns}
|
||||
\column{0.3\textwidth}
|
||||
{\centering
|
||||
\includegraphics[width=80px, keepaspectratio]{img/verificarlo.png}
|
||||
}\\%
|
||||
|
||||
{\footnotesize
|
||||
\url{https://github.com/verificarlo/verificarlo} GPL v3 \\
|
||||
}
|
||||
\column{0.7\textwidth}
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Find numerical bugs} in codes \footnotemark[1]
|
||||
\begin{itemize}
|
||||
\item Stochastic arithmetic to simulate round-off and cancellations
|
||||
\item Localization techniques to pinpoint source of errors
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{Optimize precision} \footnotemark[2]
|
||||
\begin{itemize}
|
||||
\item Simulate custom formats for mixed precision \\(float, bf16)
|
||||
\item Tune precision in math library calls
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\end{columns}
|
||||
\footnotetext[1]{
|
||||
C. Denis \textit{et al.} \href{https://dx.doi.org/10.1109/ARITH.2016.31}{doi:10.1109/ARITH.2016.31}
|
||||
}
|
||||
\footnotetext[2]{
|
||||
Y Chatelain \textit{et al.} \href{https://dx.doi.org/10.1007/978-3-030-29400-7\_34}{doi:10.1007/978-3-030-29400-7\_34}
|
||||
}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{The Verificarlo pipeline}
|
||||
\begin{itemize}
|
||||
\item Each Floating-Point (FP) operation may introduce a $\delta$ error
|
||||
$$ z = fl[x+y] = (x+y)(1+\delta) $$
|
||||
\item When chaining multiple operations, errors can accumulate and snowball
|
||||
\item \structure{Monte Carlo Arithmetic key principle}
|
||||
\begin{itemize}
|
||||
\item Make $\delta$ a \structure{random variable}
|
||||
\item Use a Monte Carlo simulation to empirically estimate the FP error distribution
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\textwidth]{img/verificarlo_pipeline.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Continuous-Integration precision tracking}
|
||||
|
||||
\begin{itemize}
|
||||
\item Each push to \structure{QMCkl} triggers a Verificarlo analysis.
|
||||
\item QMCkl kernels unit tests are augmented with probes:
|
||||
\begin{itemize}
|
||||
\item track a scalar value precision
|
||||
\item ensure that a target precision is reached
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\vspace{2cm}\vfill
|
||||
|
||||
vfc\_probe(\tikzmark{kernel}"Sherman-Morisson", \tikzmark{var}"residual", res) \\
|
||||
vfc\_probe\_assert("Sherman-Morisson", "res", res, \tikzmark{target}1e-7)
|
||||
|
||||
\begin{tikzpicture}[
|
||||
remember picture,
|
||||
overlay,
|
||||
expl/.style={draw=orange,fill=orange!30,rounded corners,text width=3cm},
|
||||
arrow/.style={red!80!black,ultra thick,->,>=latex}
|
||||
]
|
||||
\node[expl]
|
||||
(kernelex)
|
||||
at (2,3cm)
|
||||
{Kernel name};
|
||||
\node[expl]
|
||||
(varex)
|
||||
at (7,3cm)
|
||||
{Variable name};
|
||||
\node[expl]
|
||||
(targetex)
|
||||
at (12,3cm)
|
||||
{Target precision};
|
||||
\draw[arrow]
|
||||
(kernelex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1.7cm]{pic cs:kernel});
|
||||
\draw[arrow]
|
||||
(varex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1cm]{pic cs:var});
|
||||
\draw[arrow]
|
||||
(targetex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=.5cm]{pic cs:target});
|
||||
\end{tikzpicture}
|
||||
\end{frame}
|
||||
\begin{frame}[label={sec:orga0c3982}]{Verificarlo CI}
|
||||
\begin{columns}
|
||||
\begin{column}{0.5\textwidth}
|
||||
\begin{exampleblock}{Compare runs}
|
||||
\begin{center}
|
||||
\includegraphics[width=0.85\textwidth]{./img/cmp-runs.png}
|
||||
\end{center}
|
||||
\begin{itemize}
|
||||
\item Track precision of kernels over commits
|
||||
\item Shows significant digits \(s\), standard deviation \(\sigma\),
|
||||
variable distribution
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
\end{column}
|
||||
\begin{column}{0.5\textwidth}
|
||||
\begin{exampleblock}{Inspect runs}
|
||||
\begin{center}
|
||||
\includegraphics[width=0.85\textwidth]{./img/inspect-runs.png}
|
||||
\end{center}
|
||||
\begin{itemize}
|
||||
\item Focus in depth on one particular run
|
||||
\item Compare multiple implementations of the same kernel
|
||||
\end{itemize}
|
||||
\end{exampleblock}
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}[label={sec:org5b9dcc8}]{Links}
|
||||
\begin{itemize}
|
||||
\item TREX web site : \url{https://trex-coe.eu}
|
||||
\item QMCkl documentation : \url{https://trex-coe.github.io/qmckl}
|
||||
\item QMCkl repository : \url{https://github.com/trex-coe/qmckl}
|
||||
\end{itemize}
|
||||
\begin{frame}[label={sec:org6ac4d38}]{Useful links}
|
||||
\begin{center}
|
||||
\begin{tabular}{ll}
|
||||
TREX web site & \url{https://trex-coe.eu}\\
|
||||
TREXIO & \url{https://github.com/trex-coe/trexio}\\
|
||||
QMCkl & \url{https://github.com/trex-coe/qmckl}\\
|
||||
QMCkl documentation & \url{https://trex-coe.github.io/qmckl}\\
|
||||
MAQAO & \url{http://www.maqao.org}\\
|
||||
Verificarlo & \url{https://github.com/verificarlo/verificarlo}\\
|
||||
\end{tabular}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
\end{document}
|
101
verificarlo.tex
Normal file
101
verificarlo.tex
Normal file
@ -0,0 +1,101 @@
|
||||
\end{frame}
|
||||
\begin{frame}[fragile]{Numerical analysis with Verificarlo}
|
||||
|
||||
|
||||
\textbf{Verificarlo} is a tool for assessing the precision of floating point operations.
|
||||
It can be used to :
|
||||
|
||||
\begin{columns}
|
||||
\column{0.3\textwidth}
|
||||
{\centering
|
||||
\includegraphics[width=80px, keepaspectratio]{img/verificarlo.png}
|
||||
}\\%
|
||||
|
||||
{\footnotesize
|
||||
\url{https://github.com/verificarlo/verificarlo} GPL v3 \\
|
||||
}
|
||||
\column{0.7\textwidth}
|
||||
|
||||
\begin{itemize}
|
||||
\item \textbf{Find numerical bugs} in codes \footnotemark[1]
|
||||
\begin{itemize}
|
||||
\item Stochastic arithmetic to simulate round-off and cancellations
|
||||
\item Localization techniques to pinpoint source of errors
|
||||
\end{itemize}
|
||||
|
||||
\item \textbf{Optimize precision} \footnotemark[2]
|
||||
\begin{itemize}
|
||||
\item Simulate custom formats for mixed precision \\(float, bf16)
|
||||
\item Tune precision in math library calls
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\end{columns}
|
||||
\footnotetext[1]{
|
||||
C. Denis \textit{et al.} \href{https://dx.doi.org/10.1109/ARITH.2016.31}{doi:10.1109/ARITH.2016.31}
|
||||
}
|
||||
\footnotetext[2]{
|
||||
Y Chatelain \textit{et al.} \href{https://dx.doi.org/10.1007/978-3-030-29400-7\_34}{doi:10.1007/978-3-030-29400-7\_34}
|
||||
}
|
||||
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}[fragile]{The Verificarlo pipeline}
|
||||
\begin{itemize}
|
||||
\item Each Floating-Point (FP) operation may introduce a $\delta$ error
|
||||
$$ z = fl[x+y] = (x+y)(1+\delta) $$
|
||||
\item When chaining multiple operations, errors can accumulate and snowball
|
||||
\item \structure{Monte Carlo Arithmetic key principle}
|
||||
\begin{itemize}
|
||||
\item Make $\delta$ a \structure{random variable}
|
||||
\item Use a Monte Carlo simulation to empirically estimate the FP error distribution
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\begin{center}
|
||||
\includegraphics[width=.8\textwidth]{img/verificarlo_pipeline.png}
|
||||
\end{center}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Continuous-Integration precision tracking}
|
||||
|
||||
\begin{itemize}
|
||||
\item Each push to \structure{QMCkl} triggers a Verificarlo analysis.
|
||||
\item QMCkl kernels unit tests are augmented with probes:
|
||||
\begin{itemize}
|
||||
\item track a scalar value precision
|
||||
\item ensure that a target precision is reached
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\vspace{2cm}\vfill
|
||||
|
||||
vfc\_probe(\tikzmark{kernel}"Sherman-Morisson", \tikzmark{var}"residual", res) \\
|
||||
vfc\_probe\_assert("Sherman-Morisson", "res", res, \tikzmark{target}1e-7)
|
||||
|
||||
\begin{tikzpicture}[
|
||||
remember picture,
|
||||
overlay,
|
||||
expl/.style={draw=orange,fill=orange!30,rounded corners,text width=3cm},
|
||||
arrow/.style={red!80!black,ultra thick,->,>=latex}
|
||||
]
|
||||
\node[expl]
|
||||
(kernelex)
|
||||
at (2,3cm)
|
||||
{Kernel name};
|
||||
\node[expl]
|
||||
(varex)
|
||||
at (7,3cm)
|
||||
{Variable name};
|
||||
\node[expl]
|
||||
(targetex)
|
||||
at (12,3cm)
|
||||
{Target precision};
|
||||
\draw[arrow]
|
||||
(kernelex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1.7cm]{pic cs:kernel});
|
||||
\draw[arrow]
|
||||
(varex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=1cm]{pic cs:var});
|
||||
\draw[arrow]
|
||||
(targetex.south) to[out=-90,in=90] ([yshift=1.2ex, xshift=.5cm]{pic cs:target});
|
||||
\end{tikzpicture}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user