pres_intel/scemama.tex
2021-06-30 12:17:49 +02:00

272 lines
7.9 KiB
TeX

% Created 2021-06-30 Wed 12:12
% Intended LaTeX compiler: pdflatex
\documentclass[aspectratio=169]{beamer}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{graphicx}
\usepackage{grffile}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{textcomp}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)}
\usepackage{minted}
\usemintedstyle{emacs}
\newminted{f90}{fontsize=\footnotesize}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{mathtools}
\usepackage{physics}
\definecolor{darkgreen}{rgb}{0.,0.6,0.}
\definecolor{darkblue}{rgb}{0.,0.2,0.7}
\definecolor{darkred}{rgb}{0.6,0.1,0.1}
\definecolor{darkpink}{rgb}{0.7,0.0,0.7}
\newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
\newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
\usetheme{trex}
\author{Anthony Scemama}
\date{12/03/2021}
\title{Library development within TREX}
\hypersetup{
pdfauthor={Anthony Scemama},
pdftitle={Library development within TREX},
pdfkeywords={},
pdfsubject={},
pdfcreator={Emacs 26.3 (Org mode 9.4)},
pdflang={English}}
\begin{document}
\maketitle
\begin{frame}[label={sec:org52bec56}]{Quantum chemistry}
\begin{columns}
\begin{column}{0.25\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./dirac_4.jpg}
\end{center}
\end{column}
\begin{column}{0.75\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./dirac2.png}
\end{center}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org2a0da55}]{Quantum chemistry}
\begin{columns}
\begin{column}{0.6\textwidth}
\begin{exampleblock}{}
\begin{itemize}
\item Describing matter with quantum mechanics (Schrödinger's equation)
\item Users: theoretical chemists and physicists
\end{itemize}
\end{exampleblock}
\end{column}
\begin{column}{0.4\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./Water.png}
\end{center}
\end{column}
\end{columns}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./casula.png}
\end{center}
\end{column}
\begin{column}{0.6\textwidth}
\begin{exampleblock}{Implications for society}
\begin{center}
\begin{tabular}{ll}
- Health & Drug design\\
- Electronics & Nano- and micro-electronics\\
- Materials & Carbon nanotubes, graphene, \dots{}\\
- Catalysis & Enzymatic reactions, petroleum\\
\end{tabular}
\end{center}
\end{exampleblock}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org7ad98d0}]{TREX: Targeting REal chemical accuracy at the EXascale}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./Curve.png}
\end{center}
\end{column}
\begin{column}{0.6\textwidth}
\begin{exampleblock}{Objective: Make codes ready for exascale}
How: Instead of re-writing codes, provide libraries
\begin{itemize}
\item One library for exchanging information between codes (\alert{TREXIO})
\item One library for high-performance (\alert{QMCkl})
\end{itemize}
\end{exampleblock}
\begin{exampleblock}{QMC: Quantum Monte Carlo methods}
\begin{itemize}
\item Highly accurate
\item Massively parallelisable (multiple QMC trajectories)
\item CPU intensive
\end{itemize}
\end{exampleblock}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:orgd075e20}]{Quantum Monte Carlo (QMC)}
\alert{Problem}: Stochastic resolution of the Schr\"odinger equation for $N$ electrons
\begin{eqnarray}
E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
{\int \dcoord \Phi(\coord) \Phi(\coord)} \nonumber \\
&\sim & \sum \frac{ {\cal H}\Psi(\coord )}{\Psi(\coord )}
\text{, sampled with } (\Psi \times \Phi)
\nonumber
\end{eqnarray}
\begin{columns}
\begin{column}{.5\textwidth}
\begin{itemize}
\item[$\cal H $: ] Hamiltonian operator
\item[$E$: ] Energy
\end{itemize}
\end{column}
\begin{column}{.4\textwidth}
\begin{itemize}
\item[$\coord $: ] Electron coordinates
\item[$\Phi $: ] Almost exact wave function
\item[$\Psi $: ] Trial wave function
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org933f7ec}]{Quantum Monte Carlo (QMC)}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{itemize}
\item Very low memory requirements (no integrals)
\item Distribute walkers on different cores or compute nodes
\item No blocking communication: near-ideal scaling
\item Difficulty: parallelize within a QMC trajectory
\end{itemize}
\end{column}
\begin{column}{0.6\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./Qmc.png}
\end{center}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org8b6768c}]{QMC kernel library (QMCkl)}
\begin{block}{Computational kernels}
\begin{itemize}
\item QMCkl will contain the main kernels of QMC methods
\item Written together by QMC experts and HPC experts
\item Multiple high performance implementations of the kernels, tuned
for different
\begin{itemize}
\item architectures
\item problem sizes
\item requested accuracy (reduced precision)
\end{itemize}
\item Kernels will be scheduled with the StarPU runtime
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[label={sec:org24ef7da}]{QMC kernel library (QMCkl)}
\begin{block}{Two implementations}
\begin{itemize}
\item \emph{Documentation} : easy to read and understand, not necessarily efficient
\item \emph{High performance} : efficient, but not necessarily readable by physicists/chemists
\item Both \emph{Documentation} and \emph{High performance} have the same API.
\end{itemize}
\end{block}
\begin{block}{Advantages}
\begin{itemize}
\item The code can stay easy to understand by the physicists/chemists
Performance-related aspects are delegated to the library
\item Changing architecture requires only linking with another
version of the library
\item Scientific code development does not break the performance
\item Better re-use of the optimization effort among the community
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[label={sec:org8e1f375}]{Design strategy}
\begin{enumerate}
\item Kernel extraction: QMC specialists agree on the
mathematical expression of the problem
\item A mini-application is written to find the optimal data layout
with HPC experts from real-size examples
\item The kernel is written in the documentation library
\item The documentation library is linked in a QMC code to check correctness
\item HPC experts provide an HPC version of the kernel
\item The HPC library is linked in the QMC codes of the CoE
\end{enumerate}
\end{frame}
\begin{frame}[label={sec:orge2ab500}]{Our first application : 3-body Jastrow factor}
\newcommand{\Jeen}{J_{\text{een}}}
\newcommand{\Nel}{N_{\text{elec}}}
\newcommand{\Nat}{N_{\text{nucl}}}
\newcommand{\Nord}{N_{\text{nord}}}
\newcommand{\lmax}{p-k-2\delta_{k,0}}
\newcommand{\br}{\mathbf{r}}
\newcommand{\bR}{\mathbf{R}}
\newcommand{\ttr}{\, \bar{\mathtt{r}}}
\newcommand{\tR}{\, \bar{\mathtt{R}}}
\newcommand{\tP}{\, \bar{\mathtt{P}}}
\[
\Jeen (\br,\bR) = \sum_{\alpha=1}^{\Nat} \sum_{i=1}^{\Nel} \sum_{j=1}^{i-1}
\sum_{p=2}^{\Nord} \sum_{k=0}^{p-1}
\sum_{l=0}^{\lmax} c_{lkp\alpha}
\left( {r}_{ij} \right)^k
\left[ \left( {R}_{i\alpha} \right)^l + \left( {R}_{j\alpha} \right)^l \right]
\left( {R}_{i\,\alpha} \, {R}_{j\alpha} \right)^{(p-k-l)/2}
\]
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{./speedup.pdf}
\end{center}
\end{column}
\begin{column}{0.5\textwidth}
\begin{itemize}
\item Gradient and Laplacian are also required
\item Up to \(20\times\) faster than in the original code
\item \(\sim 80\%\) of the AVX-512 peak is reached
\item Using a DGEMM kernel \(\Longrightarrow\) also efficient on GPU
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}[label={sec:org5b9dcc8}]{Links}
\begin{itemize}
\item TREX web site : \url{https://trex-coe.eu}
\item QMCkl documentation : \url{https://trex-coe.github.io/qmckl}
\item QMCkl repository : \url{https://github.com/trex-coe/qmckl}
\end{itemize}
\end{frame}
\end{document}