diff --git a/Manuscript/Makefile b/Manuscript/Makefile index 850d0f4..4e3641d 100644 --- a/Manuscript/Makefile +++ b/Manuscript/Makefile @@ -1,4 +1,8 @@ TEX=stochastic_triples +BUILD=build -default: - latexmk -pdf stochastic_triples +%.pdf: %.svg + inkscape --export-type=pdf $< + +$(BUILD)/$(TEX).pdf: $(TEX).tex $(wildcard *.pdf) $(wildcard *.bib) buckets.pdf + latexmk -shell-escape -pdf -outdir=$(BUILD) $(TEX) diff --git a/Manuscript/stochastic_triples.bib b/Manuscript/stochastic_triples.bib index 4c9f605..0e60888 100644 --- a/Manuscript/stochastic_triples.bib +++ b/Manuscript/stochastic_triples.bib @@ -533,3 +533,34 @@ swh:1:dir:6d82ae7ac757c78d7720dd89dfa52d7a453d2f68;origin=https://github.com/Qua year = {2020}, publisher = {Royal Society of Chemistry}, doi = {10.1039/D0CP03800H}} + +@article{APeriodicEquaGallo2021, + author = {Gallo, Alejandro and Hummel, Felix and Irmler, Andreas and Gr\"{u}neis, Andreas}, + doi = {10.1063/5.0035425}, + issue = {6}, + journal = {The Journal of Chemical Physics}, + language = {en}, + month = {2}, + pages = {064106}, + publisher = {AIP Publishing}, + title = {A periodic equation-of-motion coupled-cluster implementation applied to F-centers in alkaline earth oxides}, + url = {http://dx.doi.org/10.1063/5.0035425}, + volume = {154}, + year = {2021} +} + +@article{Gaussian.based.James.2017, + author = {James McClain and Qiming Sun and Garnet Kin-Lic Chan and Timothy C. Berkelbach}, + doi = {10.1021/acs.jctc.7b00049}, + eprint = {1701.04832v1}, + file = {/home/gallo/Documents/papers/eee152ad779662aab5d80ea59fd35b26-James-McClain-and-Qi/document.pdf;/home/gallo/Documents/papers/eee152ad779662aab5d80ea59fd35b26-James-McClain-and-Qi/Gaussian-based-coupled-cluster-theory-for-the-ground-state-and-band-structure-of-solids-James-McClain-and-Qiming-Sun-and-Garnet-Kin-Lic-Chan-and-Timothy-C.-Berkelbach-a.pdf}, + issue = {3}, + journal = {Journal of Chemical Theory and Computation}, + month = {1}, + pages = {1209--1218}, + primaryclass = {cond-mat.mtrl-sci}, + title = {Gaussian-based coupled-cluster theory for the ground state and band structure of solids}, + url = {https://pubs.acs.org/doi/abs/10.1021/acs.jctc.7b00049}, + volume = {13}, + year = {2017} +} \ No newline at end of file diff --git a/Manuscript/stochastic_triples.tex b/Manuscript/stochastic_triples.tex index a554586..6259466 100644 --- a/Manuscript/stochastic_triples.tex +++ b/Manuscript/stochastic_triples.tex @@ -1,10 +1,12 @@ \documentclass[aip,jcp,reprint,noshowkeys,superscriptaddress]{revtex4-1} %\usepackage{graphicx,dcolumn,bm,xcolor,microtype,multirow,amscd,amsmath,amssymb,amsfonts,physics,longtable,wrapfig,bbold,siunitx,xspace} \usepackage{graphicx,xcolor,physics,siunitx,xspace} +\usepackage{orcidlink} \usepackage[version=4]{mhchem} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} +\usepackage{svg} \usepackage{algorithm2e} \usepackage{hyperref} @@ -23,14 +25,8 @@ \newcommand{\fnt}{\footnotetext} \newcommand{\tabc}[1]{\multicolumn{1}{c}{#1}} -\newcommand{\titou}[1]{\textcolor{red}{#1}} -\newcommand{\toto}[1]{\textcolor{blue}{#1}} +\newcommand{\todo}[1]{\textcolor{blue}{#1}} \newcommand{\yann}[1]{\textcolor{purple}{#1}} -\newcommand{\trashPFL}[1]{\textcolor{r\ed}{\sout{#1}}} -\newcommand{\PFL}[1]{\titou{(\underline{\bf PFL}: #1)}} -\newcommand{\AS}[1]{\toto{(\underline{\bf AS}: #1)}} -\newcommand{\YANN}[1]{\yann{(\underline{\bf YANN}: #1)}} -\newcommand{\byann}[1]{\textcolor{purple}{\sout{#1}}} \usepackage{listings} \definecolor{codegreen}{rgb}{0.58,0.4,0.2} @@ -71,39 +67,38 @@ % addresses \newcommand{\LCPQ}{Laboratoire de Chimie et Physique Quantiques (UMR 5626), Universit\'e de Toulouse, CNRS, UPS, France} -\newcommand{\Vienna}{Vienna} +\newcommand{\Vienna}{% + Institute for Theoretical Physics, TU Wien, % + Wiedner Hauptstra{\ss}e 8--10/136, 1040 Vienna, Austria % chktex 8 +} \begin{document} \title{Stochastically accelerated perturbative triples correction in coupled cluster calculations} % Alphabetic order -\author{Yann \surname{Damour}} +\author{Yann \surname{Damour}\,\orcidlink{0000-0002-8868-9863}} \affiliation{\LCPQ} -\author{Alejandro \surname{Gallo}} +\author{Alejandro \surname{Gallo}\,\orcidlink{0000-0002-3744-4161}} % chktex 8 \affiliation{\Vienna} -\author{Andreas \surname{Irmler}} - \affiliation{\Vienna} -\author{Andreas \surname{Gr\"uneis}} - \affiliation{\Vienna} -\author{Anthony \surname{Scemama}} +\author{Anthony \surname{Scemama}\,\orcidlink{0000-0003-4955-7136}} \email{scemama@irsamc.ups-tlse.fr} \affiliation{\LCPQ} \begin{abstract} We introduce a novel algorithm that leverages stochastic sampling -techniques to approximate perturbative triples correction in the +techniques to approximate the perturbative triples correction in the coupled-cluster (CC) framework. By combining elements of randomness and determinism, our algorithm achieves a favorable balance between accuracy and computational cost. The main advantage of this algorithm is that it allows for -calculations to be stopped at any time, providing an unbiased +the calculation to be stopped at any time, providing an unbiased estimate, with a statistical error that goes to zero as the exact calculation is approached. We provide evidence that our semi-stochastic algorithm achieves substantial computational savings compared to traditional deterministic methods. -Specifically, we demonstrate that a precision of 0.5 milliHartree can +Specifically, we demonstrate that a precision of 0.5 millihartree can be attained with only 10\% of the computational effort required by the full calculation. This work opens up new avenues for efficient and accurate @@ -122,16 +117,23 @@ that were previously computationally prohibitive. \section{Introduction} \label{sec:introduction} -Coupled cluster (CC) theory is an accurate quantum mechanical approach widely used in computational chemistry and physics to describe the electronic structure of atoms, molecules, and materials.\cite{Cizek_1966,Cizek_1969,Paldus_1992} -It offers a systematic and rigorous framework for accurate predictions of molecular properties and reactions by accounting for electron correlation effects beyond the mean-field approximation. -CC theory starts with a parametrized wave function, typically referred to as the CC wave function, which is expressed as an exponential series of excitation operators acting on a reference: +Coupled cluster (CC) theory is an accurate quantum mechanical approach widely used in computational chemistry to describe the electronic structure of atoms and molecules.\cite{Cizek_1966,Cizek_1969,Paldus_1992} +In recent years, CC theories for both ground state and excited states have received considerable attention in the context of material +science due to its good balance between accuracy and computational cost.\cite{APeriodicEquaGallo2021,Gaussian.based.James.2017} +CC offers a systematic and rigorous framework for accurate predictions of +molecular properties and reactions by accounting for electron correlation +effects beyond the mean-field approximation. The CC framework starts with a +parameterized wave function, typically referred to as the CC wave function, +which is expressed as an exponential series of particle-hole excitation +operators acting on a reference state: \begin{equation} \ket{\Psi_{\text{CC}}} = e^{\hat{T}} \ket{\Phi} \end{equation} -where $\ket{\Phi}$ is the reference determinant, and $\hat{T}$ is the cluster operator representing single, double, triple, and higher excitations from the reference wave function.\cite{crawford_2000,bartlett_2007,shavitt_2009} +where $\ket{\Phi}$ is the reference determinant, and $\hat{T}$ is the cluster operator representing single, double, triple, and higher excitations on top of the reference wave function.\cite{crawford_2000,bartlett_2007,shavitt_2009} -Coupled Cluster with Singles and Doubles (CCSD) includes single and double excitations and represents the most commonly used variant of CC theory due to its favorable balance between accuracy and computational cost. -Coupled Cluster with Singles, Doubles, and perturbative Triples (CCSD(T)) incorporates a perturbative correction to the CCSD energy to account for some higher-order correlation effects, and stands as the gold standard of quantum chemistry.\cite{raghavachari_1989} +Coupled Cluster with Singles and Doubles (CCSD) includes single and double particle-hole excitations and represents the most commonly used variant of CC theory. CCSD is exact for two-electron systems and includes all terms +from third order perturbation theory and beyond. +Coupled Cluster with Singles, Doubles, and perturbative Triples (CCSD(T)) incorporates a perturbative correction to the CCSD energy to account for some higher-order correlation effects, and has been termed in the literature as the gold standard of quantum chemistry.\cite{raghavachari_1989} CCSD(T) has demonstrated exceptional accuracy and reliability, making it one of the preferred choices for benchmark calculations and highly accurate predictions. It has found successful applications in a diverse range of areas, including spectroscopy,\cite{villa_2011,watson_2016,vilarrubias_2020} reaction kinetics,\cite{dontgen_2015,castaneda_2012} and materials design,\cite{zhang_2019} and has played a pivotal role in advancing our understanding of complex chemical phenomena. @@ -145,7 +147,7 @@ Our algorithm strikes a balance between computational efficiency and accuracy, making calculations for larger basis sets more feasible without compromising precision. By incorporating stochastic sampling techniques, our approach provides an alternative avenue for approximating perturbative triples, relieving the computational burden inherent in traditional deterministic methods. This not only reduces the computational time to a more favorable level but also preserves the parallelism capabilities of CC calculations, ensuring efficient utilization of computational resources. -In the following sections of this paper, we will provide a brief introduction to the computation of perturbative triples in coupled cluster theory. We will explain the principles of our semi-stochastic algorithm, outlining its key features and advantages. Additionally, we will present implementation details, discussing the technical aspects and considerations involved in the algorithm's practical realization. To demonstrate the effectiveness and applicability of our approach, we finally present illustrative examples that showcase the algorithm's performance and compare it with the conventional algorithm. +In the following sections, we will provide a brief introduction to the computation of perturbative triples in coupled cluster theory. We will explain the principles of our semi-stochastic algorithm, outlining its key features and advantages. Additionally, we will present implementation details, discussing the technical aspects and considerations involved in the algorithm's practical realization. To demonstrate the effectiveness and applicability of our approach, we finally present illustrative examples that showcase the algorithm's performance and compare it with the conventional algorithm. %=================================================================% \section{Theoretical Background} @@ -157,7 +159,7 @@ The perturbative triples correction, E_{(T)} = \sum_{ijk\,abc} E_{ijk}^{abc}, \end{equation} is a sum of $N_{\text{o}}^3 \times N_{\text{v}}^3$ terms, where $N_{\text{o}}^3$ and $N_{\text{v}}^3$ denote the number of occupied and virtual molecular orbitals, respectively. -\yann{For a closed-shell reference with canonical orbitals,} each individual term is expressed as\cite{rendell_1991} +For a closed-shell reference with canonical orbitals, each individual term is expressed as\cite{rendell_1991} \begin{equation} E_{ijk}^{abc} = \frac{1}{3} \frac{(4 W_{ijk}^{abc} + W_{ijk}^{bca} + W_{ijk}^{cab}) @@ -170,8 +172,9 @@ W_{ijk}^{abc} & = P_{ijk}^{abc} \qty( \sum_d^{\text{virt}} \qty(bd|ai) t_{kj}^{c \sum_l^{\text{occ}} \qty(ck|jl) t_{ab}^{il}) \\ V_{ijk}^{abc} & = W_{ijk}^{abc} + \qty(bj|ck) t_i^a + \qty(ai|ck) t_j^b + \qty(ai|bj) t_k^c \end{align} -$P_{ijk}^{abc}$ is a permutation operator, $t$ are the CCSD amplitudes, and -the indices $i,j,k$ and $a,b,c$ refer to occupied and virtual orbitals, respectively. +where $P_{ijk}^{abc}$ is a sign-less permutation operator, $(t^a_i, t^{ab}_{ij})$ are the CCSD amplitudes, +\((pq|rs)\) are the two-electron coulomb integrals, +and the indices $i,j,k,l$ and $a,b,c,d$ refer to occupied and virtual orbitals, respectively. The bottleneck of the perturbative triples correction is the computation of the $W$ tensor which requires $\order{N_o^3 \times N_v^4}$ operations. @@ -181,7 +184,7 @@ multiplications,\cite{form_w_abc} which are among the most efficient operations executed on modern CPUs and accelerators.\cite{ma_2011,haidar_2015,dinapoli_2014,springer_2018} -In the algorithm proposed by Rendell\cite{rendell_1991}, for each given triplet $(abc)$, the sub-tensors $W^{abc}$ and $V^{abc}$ are computed and immediately utilized to calculate their contribution to $E^{abc}$. Here, we propose a similar approach but introduce a semi-stochastic algorithm to randomly select the triplets $(abc)$, circumventing the need to compute all contributions. +In the algorithm proposed by Rendell\cite{rendell_1991}, for each given triplet $(a,b,c)$, the sub-tensors $W^{abc}$ and $V^{abc}$ are computed and immediately utilized to calculate their contribution to $E^{abc}$. Here, we propose a similar approach but introduce a semi-stochastic algorithm to randomly select the triplets $(a,b,c)$, circumventing the need to compute all contributions. %=================================================================% \section{Semi-Stochastic Algorithm} @@ -196,7 +199,6 @@ The perturbative triples correction is expressed as a sum of corrections, each i E_{(T)} = \sum_{abc} E^{abc} \text{, where } E^{abc} = \sum_{ijk} E_{ijk}^{abc}. \end{equation} - Monte Carlo sampling is employed by selecting samples $E^{abc}$. The principal advantage of this formulation is that the number of triplet combinations $(a,b,c)$, given by $N_v^3$, is sufficiently small to allow for all contributions $E^{abc}$ to be stored in memory. The first time a triplet $(a,b,c)$ is drawn, its corresponding value $E^{abc}$ is computed and then stored. @@ -220,12 +222,23 @@ where $n^{abc}$ is the number of times the triplet $(a,b,c)$ was drawn with prob This approach effectively reduces the statistical error bars by approximately a factor of two for the same computational expense due to two primary reasons: i) the estimator exhibits reduced fluctuations, ii) triplet combinations with low-energy orbitals are significantly more likely to be selected than others, enhancing the efficiency of memoization. We employ the inverse transform sampling technique to select samples, where an array of pairs $\qty(P^{abc}, (a,b,c))$ is stored. -To further reduce the variance of the samples, this array is sorted in descending order based on $P^{abc}$ and subsequently partitioned into buckets, $B$. +To further reduce the variance of the samples, this array is sorted in descending order based on $P^{abc}$ and subsequently partitioned into buckets, $B$, as can be seen diagrammatically in Figure~\ref{fig:buckets}. Each bucket is designed such that the sum $\sum_{(a,b,c) \in B} P^{abc}$ within it is as uniform as possible across all buckets. -As each bucket is equiprobable, samples are defined as combinations of triplets, with one triplet drawn from each bucket. +As each bucket is equally probable, samples are defined as combinations of triplets, with one triplet drawn from each bucket. Should the values of $E^{abc}$ be skewed, this advanced refinement significantly diminishes the variance. +\begin{figure}[h] +\centering +\includegraphics{buckets.pdf} +\caption{% +Diagrammatic representation of the bucket partition of the triplets \(a,b,c\). +Every bucket \(B_i\) contains a number of triplets such that the sum +\(\sum_{(a,b,c)}P^{abc}\) remains as uniform as possible. +\label{fig:buckets} +} +\end{figure} + The total perturbative contribution is computed as the aggregate of contributions from various buckets: \begin{equation} @@ -237,7 +250,7 @@ At this juncture, there is no longer a necessity to evaluate \(E_B\) stochastica E_{(T)} = \sum_{B \in \mathcal{D}} E_B + \frac{1}{|\mathcal{S}|} \sum_{B \in \mathcal{S}} \left \langle \frac{E^B_{abc}}{P^{abc}} \right \rangle_{P^{abc}}. \end{equation} -Not all buckets are of equal size; the number of triplets per bucket decreases with the bucket's index. Consequently, the initial buckets transition into the deterministic set first, gradually reducing the stochastic contribution. When every triplet has been drawn, the exact value of $E_{(T)}$ is obtained, devoid of statistical error. +Not all buckets are of equal size; the number of triplets per bucket increases with the bucket's index. Consequently, the initial buckets transition into the deterministic set first, gradually reducing the stochastic contribution. When every triplet has been drawn, the exact value of $E_{(T)}$ is obtained, devoid of statistical error. To accelerate the completion of the buckets, each Monte Carlo iteration triggers the computation of the first non-computed triplet. This ensures that after $N$ drawings, the exact contribution from each bucket can be obtained. @@ -256,14 +269,14 @@ consists in the computation of a single component $E^{abc}$. The computation of the running average and statistical error is executed every second, for printing or for exiting when the statistical error gets below a given threshold. -The number of samples $N^{abc}$ of each triplet $(abc)$ is initialized to $-1$, to identify +The number of samples $N^{abc}$ of each triplet $(a,b,c)$ is initialized to $-1$, to identify the contributions that have not been already computed. -An outer \emph{for} loop runs over the maximum number of iteration, equal to +An outer \emph{for loop} runs over the maximum number of iteration, equal to the number of different triplets $N_{\text{triplets}}$. -Within a loop iteration, the index of the first non-computed triplet $(abc)$ is identified, and the task associated with its computation is sent to the task queue. +Within a loop iteration, the index of the first non-computed triplet $(a,b,c)$ is identified, and the task associated with its computation is sent to the task queue. As this triplet has never been drawn, $N^{abc}$ is set to zero. -Then, a triplet $(abc)$ is drawn randomly. +Then, a triplet $(a,b,c)$ is drawn randomly. If the $E^{abc}$ has not been computed (identified by $N^{abc}=-1$), the number of samples is set to zero and the task for the computation of this contribution is enqueued. In any case, $N^{abc}$ is then incremented. @@ -318,9 +331,9 @@ $t_0 \leftarrow \text{WallClockTime}()$ \; $\mathcal{N} \gets \frac{ \sum_{i=\text{First}(i_\text{bucket}+1)}^{N_\text{triplets}} \max(N^{abc}[i],0)} { 1 - \sum_{i=1}^{\text{Last}(i_\text{bucket})} P[i] }$ \; $E_{d} \gets \sum_{i=1}^{\text{Last}(i_\text{bucket})} E^{abc}[i]$ \; - $E_{s} \gets 1/\mathcal{N}\, \sum_{i=\text{First}(i_\text{bucket}+1)}^{N_\text{triplets}} + $E_{s} \gets \frac{1}{\mathcal{N}}\, \sum_{i=\text{First}(i_\text{bucket}+1)}^{N_\text{triplets}} \max(N^{abc}[i],0)\, E^{abc}[i]/P[i]$ \; - $E_{s^2} \gets 1/\mathcal{N}\, \sum_{i=\text{First}(i_\text{bucket}+1)}^{N_\text{triplets}} + $E_{s^2} \gets \frac{1}{\mathcal{N}}\, \sum_{i=\text{First}(i_\text{bucket}+1)}^{N_\text{triplets}} \max(N^{abc}[i],0)\, \qty(E^{abc}[i]/P[i])^2$ \; $E \gets E_{d} + E_{s}$ \; $\Delta E \gets \sqrt{ \qty(E_{s^2} - {E_{s}}^2) / \qty(\mathcal{N}-1) }$ \; @@ -336,8 +349,8 @@ $t_0 \leftarrow \text{WallClockTime}()$ \; \subsection{Convergence of the statistical error in benzene} In this section we illustrate the convergence of the statistical error of the perturbative triples correction as a function of the computational cost. -The benzene molecule serves as our reference system for conducting frozen-core CCSD(T) calculations with the cc-pVTZ and cc-pVQZ basis sets. -Essentially, this involves the correlation of 30 electrons using either 258 or 503 molecular orbitals. +The benzene molecule serves as our reference system for conducting frozen-core CCSD(T) calculations employing the cc-pVTZ and cc-pVQZ basis sets. +Essentially, this involves the correlation of 30 (\(N_\text{o} = 15\)) electrons using either 258 (\(N_\text{v} = 243\)) or 503 (\(N_\text{v} = 488\)) molecular orbitals. The calculations were performed on an AMD \textsc{Epyc} 7513 dual socket server (64 cores in total). \begin{figure} @@ -354,7 +367,7 @@ The calculations were performed on an AMD \textsc{Epyc} 7513 dual socket server Figure~\ref{fig:benzene} shows the convergence of the CCSD(T) energy as a function of the program execution time using the two basis sets. Notably, the exact CCSD(T) energy always falls within $2\sigma$, affirming the reliability of the statistical error. Figure~\ref{fig:benzene_err} displays the statistical error as a function of the percentage of computed contributions. -Noteworthy in both figures are the curve discontinuities, attributable to readjustments in the separation between the deterministic and stochastic components of the calculation. +Noteworthy in the figure are the curve discontinuities, attributable to readjustments in the separation between the deterministic and stochastic components of the calculation. These updates lead to revised estimates and a diminution in statistical error. Achieving chemical accuracy, defined as \SI{1.6}{\milli\hartree}, necessitates less than 1\% of the total contributions in both basis sets. @@ -497,16 +510,26 @@ This method holds significant potential for advancing computational studies in c %%%%%%%%%%%%%%%%%%%%%%% -\acknowledgements{ -This work was supported by the European Centre of Excellence in Exascale Computing TREX --- Targeting Real Chemical Accuracy at the Exascale. -This project has received funding from the European Union's Horizon 2020 — Research and Innovation program --- under grant agreement No.~952165. -This work was performed using HPC resourced from CALMIP (Toulouse) under allocations p18005 and p22001.} +\acknowledgements{% + The authors kindly acknowledge fruitful discussions with Andreas +Gr\"{u}neis, Andreas Irmler and Tobias Sch\"{a}fer. +This work was supported by the European Centre of Excellence in +Exascale Computing TREX --- Targeting Real Chemical Accuracy at the +Exascale. +This project has received funding from the European Union's Horizon +2020 — Research and Innovation program --- under grant agreement +No.~952165. +A. Gallo acknowledges support and funding from the European Research Council (ERC) (Grant Agreement No. 101087184). +This work was performed using HPC resourced from CALMIP (Toulouse) +under allocations p18005 and p22001.% +} %%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Data availability statement} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -The data that supports the findings of this study are available within the article and its supplementary material. +The data that supports the findings of this study are available within +the article and its supplementary material. %%%%%%%%%%%%%%%%%%%%%%% \bibliography{stochastic_triples}