272 lines
7.9 KiB
TeX
272 lines
7.9 KiB
TeX
% Created 2021-06-30 Wed 12:12
|
|
% Intended LaTeX compiler: pdflatex
|
|
\documentclass[aspectratio=169]{beamer}
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage{graphicx}
|
|
\usepackage{grffile}
|
|
\usepackage{longtable}
|
|
\usepackage{wrapfig}
|
|
\usepackage{rotating}
|
|
\usepackage[normalem]{ulem}
|
|
\usepackage{amsmath}
|
|
\usepackage{textcomp}
|
|
\usepackage{amssymb}
|
|
\usepackage{capt-of}
|
|
\usepackage{hyperref}
|
|
\institute{Lab. Chimie et Physique Quantiques, IRSAMC, UPS/CNRS, Toulouse (France)}
|
|
\usepackage{minted}
|
|
\usemintedstyle{emacs}
|
|
\newminted{f90}{fontsize=\footnotesize}
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage{hyperref}
|
|
\usepackage{mathtools}
|
|
\usepackage{physics}
|
|
\definecolor{darkgreen}{rgb}{0.,0.6,0.}
|
|
\definecolor{darkblue}{rgb}{0.,0.2,0.7}
|
|
\definecolor{darkred}{rgb}{0.6,0.1,0.1}
|
|
\definecolor{darkpink}{rgb}{0.7,0.0,0.7}
|
|
\newcommand{\coord }{{\bf r}_1, \dots, {\bf r}_N }
|
|
\newcommand{\dcoord }{\dd {\bf r}_1 \dots \dd{\bf r}_N }
|
|
\usetheme{trex}
|
|
\author{Anthony Scemama}
|
|
\date{12/03/2021}
|
|
\title{Library development within TREX}
|
|
\hypersetup{
|
|
pdfauthor={Anthony Scemama},
|
|
pdftitle={Library development within TREX},
|
|
pdfkeywords={},
|
|
pdfsubject={},
|
|
pdfcreator={Emacs 26.3 (Org mode 9.4)},
|
|
pdflang={English}}
|
|
\begin{document}
|
|
|
|
\maketitle
|
|
|
|
\begin{frame}[label={sec:org52bec56}]{Quantum chemistry}
|
|
\begin{columns}
|
|
\begin{column}{0.25\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./dirac_4.jpg}
|
|
\end{center}
|
|
\end{column}
|
|
\begin{column}{0.75\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./dirac2.png}
|
|
\end{center}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[label={sec:org2a0da55}]{Quantum chemistry}
|
|
\begin{columns}
|
|
\begin{column}{0.6\textwidth}
|
|
\begin{exampleblock}{}
|
|
\begin{itemize}
|
|
\item Describing matter with quantum mechanics (Schrödinger's equation)
|
|
\item Users: theoretical chemists and physicists
|
|
\end{itemize}
|
|
\end{exampleblock}
|
|
\end{column}
|
|
\begin{column}{0.4\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./Water.png}
|
|
\end{center}
|
|
\end{column}
|
|
\end{columns}
|
|
|
|
\begin{columns}
|
|
\begin{column}{0.4\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./casula.png}
|
|
\end{center}
|
|
\end{column}
|
|
\begin{column}{0.6\textwidth}
|
|
\begin{exampleblock}{Implications for society}
|
|
\begin{center}
|
|
\begin{tabular}{ll}
|
|
- Health & Drug design\\
|
|
- Electronics & Nano- and micro-electronics\\
|
|
- Materials & Carbon nanotubes, graphene, \dots{}\\
|
|
- Catalysis & Enzymatic reactions, petroleum\\
|
|
\end{tabular}
|
|
\end{center}
|
|
\end{exampleblock}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[label={sec:org7ad98d0}]{TREX: Targeting REal chemical accuracy at the EXascale}
|
|
\begin{columns}
|
|
\begin{column}{0.4\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./Curve.png}
|
|
\end{center}
|
|
|
|
\end{column}
|
|
\begin{column}{0.6\textwidth}
|
|
\begin{exampleblock}{Objective: Make codes ready for exascale}
|
|
How: Instead of re-writing codes, provide libraries
|
|
\begin{itemize}
|
|
\item One library for exchanging information between codes (\alert{TREXIO})
|
|
\item One library for high-performance (\alert{QMCkl})
|
|
\end{itemize}
|
|
\end{exampleblock}
|
|
\begin{exampleblock}{QMC: Quantum Monte Carlo methods}
|
|
\begin{itemize}
|
|
\item Highly accurate
|
|
\item Massively parallelisable (multiple QMC trajectories)
|
|
\item CPU intensive
|
|
\end{itemize}
|
|
\end{exampleblock}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[label={sec:orgd075e20}]{Quantum Monte Carlo (QMC)}
|
|
\alert{Problem}: Stochastic resolution of the Schr\"odinger equation for $N$ electrons
|
|
\begin{eqnarray}
|
|
E &= &\frac{\int \dcoord \Phi(\coord) {\cal H} \Phi(\coord)}
|
|
{\int \dcoord \Phi(\coord) \Phi(\coord)} \nonumber \\
|
|
&\sim & \sum \frac{ {\cal H}\Psi(\coord )}{\Psi(\coord )}
|
|
\text{, sampled with } (\Psi \times \Phi)
|
|
\nonumber
|
|
\end{eqnarray}
|
|
\begin{columns}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{itemize}
|
|
\item[$\cal H $: ] Hamiltonian operator
|
|
\item[$E$: ] Energy
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}{.4\textwidth}
|
|
\begin{itemize}
|
|
\item[$\coord $: ] Electron coordinates
|
|
\item[$\Phi $: ] Almost exact wave function
|
|
\item[$\Psi $: ] Trial wave function
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[label={sec:org933f7ec}]{Quantum Monte Carlo (QMC)}
|
|
\begin{columns}
|
|
\begin{column}{0.4\textwidth}
|
|
\begin{itemize}
|
|
\item Very low memory requirements (no integrals)
|
|
\item Distribute walkers on different cores or compute nodes
|
|
\item No blocking communication: near-ideal scaling
|
|
\item Difficulty: parallelize within a QMC trajectory
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}{0.6\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./Qmc.png}
|
|
\end{center}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
|
|
|
|
|
|
\begin{frame}[label={sec:org8b6768c}]{QMC kernel library (QMCkl)}
|
|
\begin{block}{Computational kernels}
|
|
\begin{itemize}
|
|
\item QMCkl will contain the main kernels of QMC methods
|
|
\item Written together by QMC experts and HPC experts
|
|
\item Multiple high performance implementations of the kernels, tuned
|
|
for different
|
|
\begin{itemize}
|
|
\item architectures
|
|
\item problem sizes
|
|
\item requested accuracy (reduced precision)
|
|
\end{itemize}
|
|
\item Kernels will be scheduled with the StarPU runtime
|
|
\end{itemize}
|
|
\end{block}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}[label={sec:org24ef7da}]{QMC kernel library (QMCkl)}
|
|
\begin{block}{Two implementations}
|
|
\begin{itemize}
|
|
\item \emph{Documentation} : easy to read and understand, not necessarily efficient
|
|
\item \emph{High performance} : efficient, but not necessarily readable by physicists/chemists
|
|
\item Both \emph{Documentation} and \emph{High performance} have the same API.
|
|
\end{itemize}
|
|
\end{block}
|
|
|
|
\begin{block}{Advantages}
|
|
\begin{itemize}
|
|
\item The code can stay easy to understand by the physicists/chemists
|
|
Performance-related aspects are delegated to the library
|
|
\item Changing architecture requires only linking with another
|
|
version of the library
|
|
\item Scientific code development does not break the performance
|
|
\item Better re-use of the optimization effort among the community
|
|
\end{itemize}
|
|
\end{block}
|
|
\end{frame}
|
|
|
|
\begin{frame}[label={sec:org8e1f375}]{Design strategy}
|
|
\begin{enumerate}
|
|
\item Kernel extraction: QMC specialists agree on the
|
|
mathematical expression of the problem
|
|
\item A mini-application is written to find the optimal data layout
|
|
with HPC experts from real-size examples
|
|
\item The kernel is written in the documentation library
|
|
\item The documentation library is linked in a QMC code to check correctness
|
|
\item HPC experts provide an HPC version of the kernel
|
|
\item The HPC library is linked in the QMC codes of the CoE
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\begin{frame}[label={sec:orge2ab500}]{Our first application : 3-body Jastrow factor}
|
|
\newcommand{\Jeen}{J_{\text{een}}}
|
|
\newcommand{\Nel}{N_{\text{elec}}}
|
|
\newcommand{\Nat}{N_{\text{nucl}}}
|
|
\newcommand{\Nord}{N_{\text{nord}}}
|
|
\newcommand{\lmax}{p-k-2\delta_{k,0}}
|
|
\newcommand{\br}{\mathbf{r}}
|
|
\newcommand{\bR}{\mathbf{R}}
|
|
\newcommand{\ttr}{\, \bar{\mathtt{r}}}
|
|
\newcommand{\tR}{\, \bar{\mathtt{R}}}
|
|
\newcommand{\tP}{\, \bar{\mathtt{P}}}
|
|
|
|
\[
|
|
\Jeen (\br,\bR) = \sum_{\alpha=1}^{\Nat} \sum_{i=1}^{\Nel} \sum_{j=1}^{i-1}
|
|
\sum_{p=2}^{\Nord} \sum_{k=0}^{p-1}
|
|
\sum_{l=0}^{\lmax} c_{lkp\alpha}
|
|
\left( {r}_{ij} \right)^k
|
|
\left[ \left( {R}_{i\alpha} \right)^l + \left( {R}_{j\alpha} \right)^l \right]
|
|
\left( {R}_{i\,\alpha} \, {R}_{j\alpha} \right)^{(p-k-l)/2}
|
|
\]
|
|
|
|
\begin{columns}
|
|
\begin{column}{0.5\textwidth}
|
|
\begin{center}
|
|
\includegraphics[width=\textwidth]{./speedup.pdf}
|
|
\end{center}
|
|
\end{column}
|
|
\begin{column}{0.5\textwidth}
|
|
\begin{itemize}
|
|
\item Gradient and Laplacian are also required
|
|
\item Up to \(20\times\) faster than in the original code
|
|
\item \(\sim 80\%\) of the AVX-512 peak is reached
|
|
\item Using a DGEMM kernel \(\Longrightarrow\) also efficient on GPU
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}[label={sec:org5b9dcc8}]{Links}
|
|
\begin{itemize}
|
|
\item TREX web site : \url{https://trex-coe.eu}
|
|
\item QMCkl documentation : \url{https://trex-coe.github.io/qmckl}
|
|
\item QMCkl repository : \url{https://github.com/trex-coe/qmckl}
|
|
\end{itemize}
|
|
\end{frame}
|
|
\end{document} |