412 lines
14 KiB
Org Mode
412 lines
14 KiB
Org Mode
#+TITLE: TREX : an innovative view of HPC usage applied to Quantum Monte Carlo simulations
|
|
#+DATE: 02/07/2021
|
|
#+AUTHOR: Anthony Scemama$^1$, Pablo de Oliveira Castro$^2$, Cedric Valensi$^2$, William Jalby$^2$
|
|
|
|
#+LaTeX_HEADER: \institute{$^1$University of Toulouse/CNRS, LCPQ (France) \\
|
|
#+LaTeX_HEADER: $^2$University of Versailles, Li-PaRAD (France)}
|
|
#+LATEX_CLASS: beamer
|
|
#+LaTeX_CLASS_OPTIONS:[aspectratio=169]
|
|
#+BEAMER_THEME: trex
|
|
#+LaTeX_HEADER: \usepackage{minted}
|
|
#+LaTeX_HEADER: \usemintedstyle{emacs}
|
|
#+LaTeX_HEADER: \newminted{f90}{fontsize=\footnotesize}
|
|
#+LaTeX_HEADER: \usepackage[utf8]{inputenc}
|
|
#+LaTeX_HEADER: \usepackage[T1]{fontenc}
|
|
#+LaTeX_HEADER: \usepackage{hyperref}
|
|
#+LaTeX_HEADER: \usepackage{mathtools}
|
|
#+LaTeX_HEADER: \usepackage{physics}
|
|
#+LaTeX_HEADER: \definecolor{darkgreen}{rgb}{0.,0.6,0.}
|
|
#+LaTeX_HEADER: \definecolor{darkblue}{rgb}{0.,0.2,0.7}
|
|
#+LaTeX_HEADER: \definecolor{darkred}{rgb}{0.6,0.1,0.1}
|
|
#+LaTeX_HEADER: \definecolor{darkpink}{rgb}{0.7,0.0,0.7}
|
|
#+LaTeX_HEADER: \newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
|
|
#+LaTeX_HEADER: \newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
|
|
|
|
#+LaTeX_HEADER: \usepackage[backend=biber,style=alphabetic,autocite=plain,sorting=none]{biblatex}
|
|
#+LaTeX_HEADER: \addbibresource{verificarlo.bib}
|
|
#+LaTeX_HEADER: \usepackage{graphicx}
|
|
#+LaTeX_HEADER: \usepackage[many]{tcolorbox}
|
|
#+LaTeX_HEADER: \usepackage{tikz}
|
|
#+LaTeX_HEADER: \usetikzlibrary{tikzmark,positioning}
|
|
#+LaTeX_HEADER: \definecolor{grey}{RGB}{170,170,170}
|
|
|
|
#+EXPORT_EXCLUDE_TAGS: noexport
|
|
|
|
#+startup: beamer
|
|
#+options: H:1 toc:nil
|
|
|
|
* Quantum chemistry :noexport:
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.25\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./dirac_4.jpg]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.75\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./dirac2.png]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
* Quantum chemistry
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.6\textwidth}
|
|
#+LATEX: \begin{exampleblock}{}
|
|
- Describing matter with quantum mechanics (Schrödinger's equation)
|
|
- Users: theoretical chemists and physicists
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.4\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./Water.png]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.4\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./casula.png]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.6\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Implications for society}
|
|
| - Health | Drug design |
|
|
| - Electronics | Nano- and micro-electronics |
|
|
| - Materials | Carbon nanotubes, graphene, \dots |
|
|
| - Catalysis | Enzymatic reactions, petroleum |
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
* The TREX CoE
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.75\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./TREX2.png]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.25\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Codes}
|
|
- CHAMP
|
|
- QMC=Chem
|
|
- TurboRVB
|
|
- NECI
|
|
- Quantum Package
|
|
- GammCor
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
* TREX: Targeting REal chemical accuracy at the EXascale
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.4\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./Curve.png]]
|
|
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.6\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Objective: Make codes ready for exascale}
|
|
How: Instead of re-writing codes, provide libraries
|
|
- A library for exchanging information between codes (*TREXIO*)
|
|
$\Longrightarrow$ Enables HTC
|
|
- A library for high-performance (*QMCkl*)
|
|
$\Longrightarrow$ Enables HPC
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \begin{exampleblock}{QMC: Quantum Monte Carlo methods}
|
|
- Highly accurate
|
|
- Massively parallelisable (multiple QMC trajectories)
|
|
- CPU intensive
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
* I/O library (TREXIO)
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.4\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Before}
|
|
#+BEGIN_SRC dot :output file :file interfaces.png
|
|
digraph G {
|
|
QP [label="Quantum Package"];
|
|
QMCCHEM [label="QMC=Chem"];
|
|
Turbo [label="TurboRVB"];
|
|
QP -> NECI;
|
|
NECI -> GammCor [style="dotted"];
|
|
NECI -> QMCCHEM [style="dotted"] ;
|
|
QP -> QMCCHEM;
|
|
QP -> CHAMP;
|
|
QP -> GammCor [style="dotted"];
|
|
QP -> Turbo [style="dotted"];
|
|
NECI -> Turbo [style="dotted"];
|
|
NECI -> CHAMP [style="dotted"];
|
|
QMCCHEM -> GammCor [style="dotted"];
|
|
CHAMP -> GammCor [style="dotted"];
|
|
Turbo -> GammCor [style="dotted"];
|
|
}
|
|
#+END_SRC
|
|
#+RESULTS:
|
|
[[file:interfaces.png]]
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.6\textwidth}
|
|
#+LATEX: \begin{exampleblock}{After}
|
|
#+BEGIN_SRC dot :output file :file interfaces2.png
|
|
digraph G {
|
|
layout=circo;
|
|
External [label="External codes"];
|
|
QP [label="Quantum Package"];
|
|
QMCCHEM [label="QMC=Chem"];
|
|
Turbo [label="TurboRVB"];
|
|
TREX [label="TREXIO File", shape="box"];
|
|
CHAMP -> TREX;
|
|
GammCor -> TREX;
|
|
NECI -> TREX;
|
|
QMCCHEM -> TREX;
|
|
QP -> TREX;
|
|
Turbo -> TREX;
|
|
External -> TREX;
|
|
|
|
TREX -> CHAMP;
|
|
TREX -> GammCor;
|
|
TREX -> NECI;
|
|
TREX -> QMCCHEM;
|
|
TREX -> QP;
|
|
TREX -> Turbo;
|
|
TREX -> External;
|
|
}
|
|
#+END_SRC
|
|
#+RESULTS:
|
|
[[file:interfaces2.png]]
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
(BSD license) \\
|
|
https://github.com/trex-coe/trexio
|
|
|
|
* I/O library (TREXIO)
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.50\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Front end}
|
|
- Definition of an API for to read/write wave functions
|
|
- C-compatible API: Easy bindings in other languages
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \begin{exampleblock}{Content of the files}
|
|
- File is self-contained: no external knowledge needed to compute
|
|
$\Psi(r_1,\dots,r_n)$ (normalization factors, basis et
|
|
parameters, /etc/)
|
|
- Strong conventions (atomic units, ordering of cartesian orbitals, /etc/)
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.5\textwidth}
|
|
#+ATTR_LATEX: :width 0.7\textwidth
|
|
[[./api.png]]
|
|
#+LATEX: \begin{exampleblock}{Back end}
|
|
- HDF5: Efficient I/O
|
|
- Text: debugging, fallback when HDF5 can't be installed
|
|
#+LATEX: \end{exampleblock}
|
|
Source code generated from a config file.
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
|
|
* Quantum Monte Carlo (QMC)
|
|
|
|
#+BEGIN_SRC latex
|
|
\alert{Problem}: Stochastic resolution of the Schr\"odinger equation for $N$ electrons
|
|
\begin{eqnarray}
|
|
E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
|
{\int \dcoord \Phi(\coord) \Phi(\coord)} \nonumber \\
|
|
&\sim & \sum \frac{ {\cal H}\Psi(\coord )}{\Psi(\coord )}
|
|
\text{, sampled with } (\Psi \times \Phi)
|
|
\nonumber
|
|
\end{eqnarray}
|
|
\begin{columns}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{itemize}
|
|
\item[$\cal H $: ] Hamiltonian operator
|
|
\item[$E$: ] Energy
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}{.4\textwidth}
|
|
\begin{itemize}
|
|
\item[$\coord $: ] Electron coordinates
|
|
\item[$\Phi $: ] Almost exact wave function
|
|
\item[$\Psi $: ] Trial wave function
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
#+END_SRC
|
|
|
|
* Quantum Monte Carlo (QMC)
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.4\textwidth}
|
|
- Very low memory requirements (no integrals)
|
|
- Distribute walkers on different cores or compute nodes
|
|
- No blocking communication: near-ideal scaling
|
|
- Difficulty: parallelize within a QMC trajectory
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.6\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./Qmc.png]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
* QMC kernel library (QMCkl)
|
|
|
|
** Computational kernels
|
|
- QMCkl will contain the main kernels of QMC methods
|
|
- Written together by QMC experts and HPC experts
|
|
- Multiple high performance implementations of the kernels, tuned
|
|
for different
|
|
- architectures
|
|
- problem sizes
|
|
- requested accuracy (reduced precision)
|
|
|
|
* QMC kernel library (QMCkl)
|
|
|
|
** Two implementations
|
|
- /Documentation/ : easy to read and understand, not necessarily efficient
|
|
- /High performance/ : efficient, but not necessarily readable by physicists/chemists
|
|
- Both /Documentation/ and /High performance/ have the same API.
|
|
|
|
** Advantages
|
|
- The code can stay easy to understand by the physicists/chemists
|
|
Performance-related aspects are delegated to the library
|
|
- Scientists can use their preferred language
|
|
- Scientists don't lose control on their codes
|
|
- Codes don't die when the architecture changes
|
|
- Scientific code development does not break the performance
|
|
- Better re-use of the optimization effort among the community
|
|
|
|
* Documentation library :noexport:
|
|
Literate programming with org-mode:
|
|
- Comments are more important than code
|
|
- Can add graphics, \LaTeX formulas, tables, etc
|
|
- Documentation always synchronized with the code
|
|
- Some routines can be generated by embedded scripts
|
|
- Most of the the first report was generated from the documentation
|
|
- Kernels are implemented in Fortran for readability
|
|
- The API is C-compatible: QMCkl appears like a C library
|
|
$\Longrightarrow$ can be used in all other languages
|
|
|
|
* HPC library
|
|
- Same API as the documentation library
|
|
- Optimization is guided by analysis with *MAQAO*\footnote{https://maqao.org}.
|
|
- Propose performance-critical choices in the API design (data
|
|
structures, memory management, /etc/)
|
|
- Both CPU and GPU versions of the kernels
|
|
- Task parallelism with StarPU\footnote{C. Augonnet et al, doi:10.1002/cpe.1631} to schedule kernels on CPU and GPU and
|
|
handle asynchronous CPU-GPU transfers
|
|
|
|
* Efficiently guiding the developer
|
|
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./maqao1.png]]
|
|
* Extensive/automatic testing of different configurations
|
|
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./maqao2.png]]
|
|
* Design strategy :noexport:
|
|
|
|
1. Kernel extraction: QMC specialists agree on the
|
|
mathematical expression of the problem
|
|
2. A mini-application is written to find the optimal data layout
|
|
with HPC experts from real-size examples
|
|
3. The kernel is written in the documentation library
|
|
4. The documentation library is linked in a QMC code to check correctness
|
|
5. HPC experts provide an HPC version of the kernel
|
|
6. The HPC library is linked in the QMC codes of the CoE
|
|
|
|
* First application : 3-body Jastrow factor
|
|
|
|
#+LATEX: \newcommand{\Jeen}{J_{\text{een}}}
|
|
#+LATEX: \newcommand{\Nel}{N_{\text{elec}}}
|
|
#+LATEX: \newcommand{\Nat}{N_{\text{nucl}}}
|
|
#+LATEX: \newcommand{\Nord}{N_{\text{nord}}}
|
|
#+LATEX: \newcommand{\lmax}{p-k-2\delta_{k,0}}
|
|
#+LATEX: \newcommand{\br}{\mathbf{r}}
|
|
#+LATEX: \newcommand{\bR}{\mathbf{R}}
|
|
#+LATEX: \newcommand{\ttr}{\, \bar{\mathtt{r}}}
|
|
#+LATEX: \newcommand{\tR}{\, \bar{\mathtt{R}}}
|
|
#+LATEX: \newcommand{\tP}{\, \bar{\mathtt{P}}}
|
|
|
|
\[
|
|
\Jeen (\br,\bR) = \sum_{\alpha=1}^{\Nat} \sum_{i=1}^{\Nel} \sum_{j=1}^{i-1}
|
|
\sum_{p=2}^{\Nord} \sum_{k=0}^{p-1}
|
|
\sum_{l=0}^{\lmax} c_{lkp\alpha}
|
|
\left( {r}_{ij} \right)^k
|
|
\left[ \left( {R}_{i\alpha} \right)^l + \left( {R}_{j\alpha} \right)^l \right]
|
|
\left( {R}_{i\,\alpha} \, {R}_{j\alpha} \right)^{(p-k-l)/2}
|
|
\]
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.5\textwidth}
|
|
#+ATTR_LATEX: :width \textwidth
|
|
[[./speedup.pdf]]
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.5\textwidth}
|
|
- Gradient and Laplacian are also required
|
|
- Up to $20\times$ faster than in the original code
|
|
- $\sim 80\%$ of the AVX-512 peak is reached
|
|
- Expressed with a DGEMM kernel $\Longrightarrow$ also efficient on GPU
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
|
|
#+INCLUDE: "verificarlo.tex" export latex
|
|
* Verificarlo CI
|
|
|
|
#+LATEX: \begin{columns}
|
|
#+LATEX: \begin{column}{0.5\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Compare runs}
|
|
#+ATTR_LATEX: :width 0.85\textwidth
|
|
[[./img/cmp-runs.png]]
|
|
- Track precision of kernels over commits
|
|
- Shows significant digits $s$, standard deviation $\sigma$,
|
|
variable distribution
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \begin{column}{0.5\textwidth}
|
|
#+LATEX: \begin{exampleblock}{Inspect runs}
|
|
#+ATTR_LATEX: :width 0.85\textwidth
|
|
[[./img/inspect-runs.png]]
|
|
- Focus in depth on one particular run
|
|
- Compare multiple implementations of the same kernel
|
|
#+LATEX: \end{exampleblock}
|
|
#+LATEX: \end{column}
|
|
#+LATEX: \end{columns}
|
|
|
|
|
|
* Useful links
|
|
|
|
| TREX web site | https://trex-coe.eu |
|
|
| TREXIO | https://github.com/trex-coe/trexio |
|
|
| QMCkl | https://github.com/trex-coe/qmckl |
|
|
| QMCkl documentation | https://trex-coe.github.io/qmckl |
|
|
| MAQAO | http://www.maqao.org |
|
|
| Verificarlo | https://github.com/verificarlo/verificarlo |
|
|
|
|
* Export :noexport:
|
|
#+BEGIN_SRC elisp :output none
|
|
(setq org-latex-listings 'minted)
|
|
(setq org-latex-custom-lang-environments
|
|
'(
|
|
(f90 "fortran")
|
|
))
|
|
(setq org-latex-minted-options
|
|
'(("frame" "lines")
|
|
("fontsize" "\\scriptsize")
|
|
("linenos" "")))
|
|
(setq org-latex-to-pdf-process
|
|
'("pdflatex -shell-escape -interaction nonstopmode -output-directory %o %f"
|
|
"pdflatex -shell-escape -interaction nonstopmode -output-directory %o %f"
|
|
"pdflatex -shell-escape -interaction nonstopmode -output-directory %o %f"))
|
|
(org-beamer-export-to-pdf)
|
|
#+END_SRC
|
|
|
|
#+RESULTS:
|
|
: /home/scemama/TEX/ISC2021/scemama.pdf
|