RSDFT-CIPSI-QMC/Manuscript/rsdft-cipsi-qmc.tex

\documentclass[aip,jcp,reprint,noshowkeys,superscriptaddress]{revtex4-1}
\usepackage{graphicx,dcolumn,bm,xcolor,microtype,multirow,amsmath,amssymb,amsfonts,physics,mhchem,xspace,subfigure}

\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}

\usepackage{txfonts}

\usepackage[
	colorlinks=true,
    citecolor=blue,
    breaklinks=true
	]{hyperref}
\urlstyle{same}

\newcommand{\alert}[1]{\textcolor{red}{#1}}
\newcommand{\eg}[1]{\textcolor{blue}{#1}}
\definecolor{darkgreen}{HTML}{009900}
\usepackage[normalem]{ulem}

\newcommand{\EPT}{E_{\text{PT2}}}

\newcommand{\LCT}{Laboratoire de Chimie Théorique (UMR 7616), Sorbonne Université, CNRS, Paris, France}
\newcommand{\ANL}{Argonne Leadership Computing Facility, Argonne National Laboratory, Argonne, IL 60439, United States}
\newcommand{\LCPQ}{Laboratoire de Chimie et Physique Quantiques (UMR 5626), Université de Toulouse, CNRS, UPS, France}


\begin{document}

\title{Enabling high accuracy diffusion Monte Carlo calculations with
  range-separated density functional theory and selected configuration interaction}

\author{Anthony Scemama}
\affiliation{\LCPQ}
\author{Emmanuel Giner}
\email{emmanuel.giner@lct.jussieu.fr}
\affiliation{\LCT}
\author{Anouar Benali}
\email{benali@anl.gov}
\affiliation{\ANL}
\author{Pierre-Fran\c{c}ois Loos}
\email{loos@irsamc.ups-tlse.fr}
\affiliation{\LCPQ}


\begin{abstract}
\end{abstract}

\maketitle


\section{Introduction}
\label{sec:intro}

The full configuration interaction (FCI) method within a finite atomic
basis set leads to an approximate solution of the Schrödinger
equation.
This solution is the eigenpair of an approximate Hamiltonian, which is
the projection of the exact Hamiltonian onto the finite basis of all
possible Slater determinants.
The FCI wave function can be interpreted as the constrained solution of the
true Hamiltonian, where the solution is forced to span the space
provided by the basis.
At the complete basis set (CBS) limit, the constraint vanishes and the
exact solution is obtained.
Hence the FCI method enables a systematic improvement of the
calculations by improving the quality of the basis set.
Nevertheless, its exponential scaling with the number of electrons and
with the size of the basis is prohibitive for large systems.
In recent years, the introduction of new algorithms\cite{Booth_2009}
and the
revival\cite{Abrams_2005,Bytautas_2009,Roth_2009,Giner_2013,Knowles_2015,Holmes_2016,Liu_2016}
of selected configuration interaction (sCI)
methods\cite{Bender_1969,Huron_1973,Buenker_1974} pushed the limits of
the sizes of the systems that could be computed at the FCI level, but
the scaling remains exponential unless some bias is introduced leading
to a loss of size extensivity.

The Diffusion Monte Carlo (DMC) method is a numerical scheme to obtain
the exact solution of the Schrödinger equation with an additional
constraint, imposing the solution to have the same nodal hypersurface
as a given trial wave function.
Within this so-called \emph{fixed-node} approximation,
the DMC energy associated with a given trial wave function is an upper
bound to the exact energy, and the latter is recovered only when the
nodes of the trial wave function coincide with the nodes of the exact
wave function.
The polynomial scaling with the number of electrons and with the size
of the trial wave function makes the DMC method attractive.
In addition, the total energies obtained are usually far below
those obtained with the FCI method in computationally tractable basis
sets because the constraints imposed by the fixed-node approximation
are less severe than the constraints imposed by the finite-basis
approximation.

The qualitative picture of the electronic structure of weakly
correlated systems, such as organic molecules near their equilibrium
geometry, is usually well represented with a single Slater
determinant. This feature is in part responsible for the success of
density functional theory (DFT) and coupled cluster.
DMC with a single-determinant trial wave function can be used as a
single-reference post-Hatree-Fock method, with an accuracy comparable
to coupled cluster.\cite{Dubecky_2014,Grossman_2002}
The favorable scaling of QMC, its very low memory requirements and
its adequation with massively parallel architectures make it a
serious alternative for high-accuracy simulations on large systems.

As it is not possible to minimize directly the DMC energy with respect
to the variational parameters of the trial wave function, the
fixed-node approximation is much more difficult to control than the
finite-basis approximation.
The conventional approach consists in multiplying the trial wave
function by a positive function, the \emph{Jastrow factor}, taking
account of the electron-electron cusp and the short-range correlation
effects. The wave function is then re-optimized within Variational
Monte Carlo (VMC) in the presence of the Jastrow factor and the nodal
surface is expected to be improved. Using this technique, it has been
shown that the chemical accuracy could be reached within
DMC.\cite{Petruzielo_2012}

Another approach consists in considering the DMC method as a
\emph{post-FCI method}. The trial wave function is obtained by
approaching the FCI with a selected configuration interaction (sCI)
method such as CIPSI for instance.\cite{Giner_2013,Caffarel_2016_2}
When the basis set is increased, the trial wave function gets closer
to the exact wave function, so the nodal surface can be systematically
improved.\cite{Caffarel_2016}
This technique has the advantage that using FCI nodes in a given basis
set is well defined, so the calculations are reproducible in a
black-box way without needing any expertise in QMC.
But this technique can't be applied to large systems because of the
exponential scaling of the size of the trial wave function.
Extrapolation techniques have been used to estimate the DMC energies
obtained with FCI wave functions,\cite{Scemama_2018} and other authors
have used a combination of the two approaches where highly truncated
CIPSI trial wave functions are re-optimized in VMC under the presence
of a Jastrow factor to keep the number of determinants
small,\cite{Giner_2016} and where the consistency between the
different wave functions is kept by imposing a constant energy
difference between the estimated FCI energy and the variational energy
of the CI wave function.\cite{Dash_2018,Dash_2019}

Nevertheless, finding a robust protocol to obtain high accuracy
calculations which can be reproduced systematically, and which is
applicable for large systems with a multi-configurational character is
still an active field of research. The present paper falls
within this context.


\section{Combining CIPSI with range-separated DFT}
\label{sec:rsdft-cipsi}

In single-determinant DMC calculations, the degrees of freedom used to
reduce the fixed-node error are the molecular orbitals on which the
Slater determinant is built.
Different molecular orbitals can be chosen:
Hartree-Fock (HF), Kohn-Sham (KS), natural (NO) orbitals of a
correlated wave function, or orbitals optimized under the
presence of a Jastrow factor.
The nodal surfaces obtained with the KS determinant are in general
better than those obtained with the HF determinant,\cite{Per_2012} and
of comparable quality to those obtained with a Slater determinant
built with NOs.\cite{Wang_2019} Orbitals obtained in the presence
of a Jastrow factor are generally superior to KS
orbitals.\cite{Filippi_2000,Scemama_2006,HaghighiMood_2017,Ludovicy_2019}

The description of electron correlation within DFT is very different
from correlated methods.
In DFT, one solves a mean field problem with a modified potential
incorporating the effects of electron correlation, whereas in
correlated methods the real Hamiltonian is used and the
electron-electron interactions are considered.
Nevertheless, as the orbitals are one-electron functions,
the procedure of orbital optimization in the presence of the
Jastrow factor can be interpreted as a self-consistent field procedure
with an effective Hamiltonian,\cite{Filippi_2000} similarly to DFT.
So DFT can be viewed as a very cheap way of introducing the effect of
correlation in the parameters determining the nodal surface. But in
the general case, even at the complete basis set limit a fixed-node
error will remain because the single-determinant ans\"atz does not
have enough freedom to describe the exact nodal surface.
If one wants to have to exact CBS limit, a multi-determinant
parameterization of the wave functions is required.

\subsection{CIPSI}

Beyond the single-determinant representation, the best
multi-determinant wave function one can obtain is the FCI. FCI is
a \emph{post-Hartree-Fock} method, and there is a continuous
connection between the Hartree-Fock and FCI wave functions.
Multiple paths exist: one can for example use
CI methods increasing the maximum degree of excitation (CISD, CISDT,
CISDTQ, \emph{etc}), or use increasingly large complete active space
(CAS) wave functions until all the orbitals are in the active space.
Selected CI methods take a shorter path between the Hartree-Fock
determinant and the FCI wave function by increasing iteratively the
number of determinants on which the wave function is expanded,
selecting the determinants which are expected to contribute the most
to the FCI eigenvector. At every iteration, the lowest eigenpair is
extracted from the CI matrix expressed in the determinant subspace,
and the FCI energy can be estimated by computing a second-order
perturbative correction (PT2) to the variational energy, $\EPT$.
The magnitude of $\EPT$ is a
measure of the distance to the exact eigenvalue, and is an adjustable
parameter controlling the quality of the wave function.
Within the \emph{Configuration interaction using a perturbative
selection made iteratively} (CIPSI)\cite{Huron_1973} method, the PT2
correction is computed along with the determinant selection. So the
magnitude of $\EPT$ can be made the only parameter of the algorithm,
and we choose this parameter as the convergence criterion.

Considering that the perturbatively corrected energy is a reliable
estimate of the FCI energy, using a fixed value of the PT2 correction
as a stopping criterion enforces a constant distance of all the
calculations to the FCI energy. In this work, we target the chemical
accuracy so all the CIPSI selections were made such that $|\EPT| <
1$~mE$_h$.


\subsection{Range-separated DFT}
\label{sec:rsdft}

Following the seminal work of Savin,\cite{Savin_1996,Toulouse_2004}
the Coulomb electron-electron interaction is split into a short-range
(sr) and a long range (lr) interaction as
\begin{equation}
  \frac{1}{r_{ij}} = w_{\text{ee}}^{\text{lr}, \mu}(r_{ij}) + \qty(
  \frac{1}{r_{ij}} - w_{\text{ee}}^{\text{lr}, \mu}(r_{ij}) )
\end{equation}
where
\begin{equation}
  w_{\text{ee}}^{\text{lr},\mu}(r_{ij}) = \frac{\erf \qty( \mu\, r_{ij})}{r_{ij}}
\end{equation}
The main idea is to treat the short-range electron-electron
interaction with DFT, and the long range with wave function theory.
The parameter $\mu$ controls the range of the separation, and allows
to go continuously from the Kohn-Sham Hamiltonian ($\mu=0$) to
the FCI Hamiltinoan ($\mu = \infty$).

The universal density functional is decomposed as
\begin{equation}
  \mathcal{F}[n] = \mathcal{F}^{\mathrm{lr},\mu}[n] + \bar{E}_{\mathrm{Hxc}}^{\mathrm{sr,}\mu}[n],
  \label{Fdecomp}
\end{equation}
where $n$ is a one-particle density,
$\mathcal{F}^{\mathrm{lr},\mu}$ is a long-range universal density
functional and $\bar{E}_{\mathrm{Hxc}}^{\mathrm{sr,}\mu}$ is the
complementary short-range Hartree-exchange-correlation (Hxc) density
functional.
One obtains the following expression for the ground-state
electronic energy
\begin{equation}
  \label{min_rsdft} E_0= \min_{\Psi} \left\{
\left
  \langle\Psi|\hat{T}+\hat{W}_\mathrm{{ee}}^{\mathrm{lr},\mu}+\hat{V}_{\mathrm{ne}}|\Psi\right
\rangle
+ \bar{E}^{\mathrm{sr},\mu}_{\mathrm{Hxc}}[n_\Psi]\right\}
\end{equation}
with $\hat{T}$ the kinetic energy operator, 
$\hat{W}_\mathrm{ee}^{\mathrm{lr}}$ the long-range
electron-electron interaction,
$n_\Psi$ the one-particle density associated with $\Psi$,
and $\hat{V}_{\mathrm{ne}}$ the electron-nucleus potential.
The minimizing multi-determinant wave function $\Psi^\mu$ 
can be determined by the self-consistent eigenvalue equation
\begin{equation}
  \label{rs-dft-eigen-equation}
  \hat{H}^\mu[n_{\Psi^{\mu}}] \ket{\Psi^{\mu}}= \mathcal{E}^{\mu} \ket{\Psi^{\mu}},
\end{equation}
with the long-range interacting Hamiltonian
\begin{equation}
  \label{H_mu}
  \hat{H}^\mu[n_{\Psi^{\mu}}] = \hat{T}+\hat{W}_{\mathrm{ee}}^{\mathrm{lr},\mu}+\hat{V}_{\mathrm{ne}}+ \hat{\bar{V}}_{\mathrm{Hxc}}^{\mathrm{sr},\mu}[n_{\Psi^{\mu}}],
\end{equation}
where
$\hat{\bar{V}}_{\mathrm{Hxc}}^{\mathrm{sr},\mu}$
is the complementary short-range Hartree-exchange-correlation
potential operator.
Once $\Psi^{\mu}$ has been calculated, the electronic ground-state
energy is obtained by
\begin{equation}
  \label{E-rsdft}
  E_0=  \mel{\Psi^{\mu}}{\hat{T}+\hat{W}_\mathrm{{ee}}^{\mathrm{lr},\mu}+\hat{V}_{\mathrm{ne}}}{\Psi^{\mu}}+\bar{E}^{\mathrm{sr},\mu}_{\mathrm{Hxc}}[n_{\Psi^\mu}].
\end{equation}

Note that, for $\mu=0$, the long-range interaction vanishes,
$w_{\mathrm{ee}}^{\mathrm{lr},\mu=0}(r_{12}) = 0$, and thus
range-separated DFT (RS-DFT) reduces to standard KS-DFT and $\Psi^\mu$
is the KS determinant. For $\mu\to\infty$, the long-range
interaction becomes the standard Coulomb interaction,
$w_{\mathrm{ee}}^{\mathrm{lr},\mu\to\infty}(r_{12}) = 1/r_{12}$, and
thus RS-DFT reduces to standard wave-function theory and $\Psi^\mu$ is
the FCI wave function.

\begin{figure*}
  \centering
  \includegraphics[width=0.7\linewidth]{algorithm.pdf}
  \caption{Algorithm showing the generation of the RS-DFT wave
    function.}
  \label{fig:algo}
\end{figure*}

Hence we have a continuous path connecting the KS determinant to the
FCI wave function, and as the KS nodes are of higher quality than the
HF nodes, we expect that using wave functions built along this path
will always provide reduced fixed-node errors compared to the path
connecting HF to FCI using an increasing number of selected
determinants.

We can follow this path by performing FCI calculations using the
RS-DFT Hamiltonian with different values of $\mu$.  In this work, we
have used the CIPSI algorithm to peform approximate FCI calculations
with the RS-DFT Hamiltonians,\cite{GinPraFerAssSavTou-JCP-18} as shown
in figure~\ref{fig:algo}.  In the outer loop (red), a CIPSI selection
is performed with a RS-Hamiltonian parameterized using the current
density.  An inner loop (blue) is introduced to accelerate the
calculation, in which the set of determinants is kept fixed, and only
the diagonalization of the RS-Hamiltonian is performed iteratively.
The convergence of the algorithm was further improved
by introducing a direct inversion in the iterative subspace (DIIS)
step to extrapolate the density both in the outer and inner loops.
As always, the convergence criterion for CIPSI was set to $\EPT <
1$~m$E_h$.


\subsection{Approximations}
In this work, we use the short-range version of the
Perdew-Burke-Ernzerhof (PBE)~\cite{PerBurErn-PRL-96} exchange and
correlation functionals of Ref.~\onlinecite{GolWerStoLeiGorSav-CP-06}
(see also Refs.~\onlinecite{TouColSav-JCP-05,GolWerSto-PCCP-05}).


\subsection{RSDFT-CIPSI}


\begin{enumerate}
 \item Total energies and nodal quality:
  \begin{itemize}
   \item Facts: KS occupied orbitals closer to NOs than HF
   \item Even if exact functional, complete basis set, still approximated nodes for KS
   \item KS -> exponentially fast convergence (as HF) with basis because of non divergence of effective KS potential (citer le papier de Gill)
   \item With correlation consistent basis set, FCI nodes (which include correlation) are better than KS
   \item With FCI, good limit at CBS ==> exact energy
   \item But slow convergence with basis set because of divergence of the e-e interaction not well represented in atom centered basis set
   \item Exponential increase of number of Slater determinants
   \item Cite papiers RS-DFT: there exists an hybrid scheme combining fast convergence wr to basis set (non divergent basis set) and short expansion in SCI (cite papier Ferté)
   \item Question: does such a scheme provide better nodal quality ?
   \item In RSDFT we cannot optimize energy with $\mu$ , but in FNDMC
   \item Factual stuffs: with optimal $\mu$, lower FNDMC energy than HF/KS/FCI
    \begin{itemize}
     \item  less determinants $\Rightarrow$ large systems
     \item  only one parameter to optimize $\Rightarrow$ deterministic
     \item $\Rightarrow$ reproducible
    \end{itemize}
   \item with the optimal $\mu$:
    \begin{itemize}
     \item Direct optimization of FNDMC with one parameter
     \item Do we improve energy differences ?
     \item system dependent
     \item basis set dependent: $\mu \rightarrow \infty$ when $\mathcal{B}\rightarrow \text{CBS}$
     \item large wave functions
    \end{itemize}
    \begin{itemize}
      \item plot $N_{det}$ en fonction de $\mu$
    \end{itemize}
  \end{itemize}
\end{enumerate}


% Overlap with reoptimized
% Plot Ndets as a function of mu


\section{Influence of the range-separation parameter on the fixed-node
  error}
\label{sec:mu-dmc}
\begin{table}
  \caption{Fixed-node energies of the water molecule.}
  \label{tab:h2o-dmc}
  \centering
  \begin{tabular}{crlrl}
    \hline
              &  \multicolumn{2}{c}{BFD-VDZ} &  \multicolumn{2}{c}{BFD-VTZ}\\
    $\mu$     &  $N_{\text{det}}$  &  E(DMC)      &  $N_{\text{det}}$  &  E(DMC)\\
    \hline
    $0.00$    &  $11$         &  $-17.253\,59(6)$  &  $23$           &  $-17.256\,74(7)$\\
    $0.20$    &  $23$         &  $-17.253\,73(7)$  &  $23$           &  $-17.256\,73(8)$\\
    $0.30$    &  $53$         &  $-17.253\,4(2)$   &  $219$          &  $-17.253\,7(5)$\\
    $0.50$    &  $1\,442$     &  $-17.253\,9(2)$   &  $16\,99$       &  $-17.257\,7(2)$\\
    $0.75$    &  $3\,213$     &  $-17.255\,1(2)$   &  $13\,362$      &  $-17.258\,4(3)$\\
    $1.00$    &  $6\,743$     &  $-17.256\,6(2)$   &  $256\,73$      &  $-17.261\,0(2)$\\
    $1.75$    &  $54\,540$    &  $-17.259\,5(3)$   &  $207\,475$     &  $-17.263\,5(2)$\\
    $2.50$    &  $51\,691$    &  $-17.259\,4(3)$   &  $858\,123$     &  $-17.264\,3(3)$\\
    $3.80$    &  $103\,059$   &  $-17.258\,7(3)$   &  $1\,621\,513$  &  $-17.263\,7(3)$\\
    $5.70$    &  $102\,599$   &  $-17.257\,7(3)$   &  $1\,629\,655$  &  $-17.263\,2(3)$\\
    $8.50$    &  $101\,803$   &  $-17.257\,3(3)$   &  $1\,643\,301$  &  $-17.263\,3(4)$\\
    $\infty$  &  $200\,521$   &  $-17.256\,8(6)$   &  $1\,631\,982$  &  $-17.263\,9(3)$\\
    \hline
  \end{tabular}
\end{table}

\begin{figure}
  \centering
  \includegraphics[width=\columnwidth]{h2o-dmc.pdf}
  \caption{Fixed-node energies of the water molecule for different
    values of $\mu$.}
  \label{fig:h2o-dmc}
\end{figure}

\begin{figure}
  \centering
  \includegraphics[width=\columnwidth]{f2-dmc.pdf}
  \caption{Fixed-node energies of difluorine for different
    values of $\mu$.}
  \label{fig:f2-dmc}
\end{figure}
The water molecule was taken at the equilibrium
geometry,\cite{Caffarel_2016} and RSDFT-CIPSI wave functions were
generated with BFD pseudopotentials and the corresponding double-zeta
basis set using multiple values of the range-separation parameter
$\mu$. The convergence criterion for stopping the CIPSI calculation
was set to 1~m$E_h$ on the PT2 correction. Then, these wave functions
were used as trial wave functions for FN-DMC calculations, and the
corresponding energies are shown in table~\ref{tab:h2o-dmc} and
figure~\ref{fig:h2o-dmc}.

Using FCI trial wave functions gives FN-DMC energies which are lower
than the energies obtained with a single Kohn-Sham determinant:
3~m$E_h$ at the double-zeta level and 7~m$E_h$ at the triple-zeta
level. Interestingly, with the double-zeta basis one can obtain a
FN-DMC energy 2.5~m$E_h$ lower than the energy obtained with the FCI
trial wave function, using the RSDFT-CIPSI with a range-separation
parameter $\mu=1.75$. This can be explained by the inability of the
basis set to properly describe short-range correlation, shifting
the nodes from their optimal position. Using DFT to take account of
short-range correlation frees the determinant expansion from describing
short-range effects, and enables a better placement of the nodes.
At the triple-zeta level, the short-range correlations can be better
described, and the improvement due to DFT is insignificant. However,
it is important to note that the same FN-DMC energy can be
obtained with a CI expansion which is eight times smaller when sr-DFT
is introduced. One can also remark that the minimum has been
shifted towards the FCI, which is consistent with the fact that
in the CBS limit we expect the minimum of the FN-DMC energy to be
obtained for the FCI wave function, at $\mu=\infty$.


\begin{figure}
  \centering
  \includegraphics[width=\columnwidth]{overlap.pdf}
  \caption{Overlap of the RSDFT-CIPSI wave functions with the
    wave function reoptimized in the presence of a Jastrow factor.}
  \label{fig:overlap}
\end{figure}
\section{Computational details}
\label{sec:comp-details}

All the calculations were made using BFD
pseudopotentials\cite{Burkatzki_2008} with the associated double, triple
and quadruple zeta basis sets.
CCSD(T) and DFT calculations were made with
\emph{Gaussian09},\cite{g16} using an unrestricted Hartree-Fock
determinant as a reference for open-shell systems. All the CIPSI
calculations and range-separated CIPSI calculations were made with
\emph{Quantum Package}.\cite{Garniron_2019,qp2_2020}
Quantum Monte Carlo calculations were made with QMC=Chem,\cite{scemama_2013}
in the determinant localization approximation.\cite{Zen_2019}

In the determinant localization approximation, only the determinantal
component of the trial wave function is present in the expression of
the wave function on which the pseudopotential is localized. Hence,
the pseudopotential operator does not depend on the Jastrow factor, as
it is the case in all-electron calculations. This improves the
reproducibility of the results, as they depend only on parameters
optimized in a deterministic framework.


\section{Atomization energy benchmarks}
\label{sec:atomization}

Atomization energies are challenging for post-Hartree-Fock methods
because their calculation requires a perfect balance in the
description of atoms and molecules. Basis sets used in molecular
calculations are atom-centered, so they are always better adapted to
atoms than molecules and atomization energies usually tend to be
underestimated.
In the context of FN-DMC calculations, the nodal surface is imposed by
the trial wavefunction which is expanded on an atom-centered basis
set. So we expect the fixed-node error to be also related to the basis
set incompleteness error.
Increasing the size of the basis set improves the description of
the density and of electron correlation, but also reduces the
imbalance in the quality of the description of the atoms and the
molecule, leading to more accurate atomization energies.

Another important feature required to get accurate atomization
energies is size-extensivity, since the numbers of correlated electrons
in the isolated atoms are different from the number of correlated
electrons in the molecule.
In the context of selected CI calculations, when the variational energy is
extrapolated to the FCI energy\cite{Holmes_2017} there is obviously no
size-consistence error. But when the selected wave function is used
for as a reference for post-Hartree-Fock methods or QMC calculations,
there is a residual size-consistence error originating from the
truncation of the determinant space.

% Invariance with m_s

QMC calculations can be made size-consistent by extrapolating the
FN-DMC energy to estimate the energy obtained with the FCI as a trial
wave function.\cite{Scemama_2018,Scemama_2018b} Alternatively, the
size-consistence error can be reduced by choosing the number of
selected determinants such that the sum of the PT2 corrections on the
atoms is equal to the PT2 correction of the molecule, enforcing that
the variational dissociation potential energy surface (PES) is
parallel to the perturbatively corrected PES, which is an accurate
estimate of the FCI PES.\cite{Giner_2015}

Another source of size-consistence error in QMC calculation may originate
from the Jastrow factor. Usually, the Jastrow factor contains
one-electron, two-electron and one-nucleus-two-electron terms.
The problematic part is the two-electron term, whose simplest form can
be expressed as
\begin{equation}
  J_\text{ee} = \sum_i \sum_{j<i} \frac{a r_{ij}}{1 + b r_{ij}}.
\end{equation}
$a$ is determined by cusp conditions, and $b$ is obtained by energy
or variance minimization.\cite{Coldwell_1977,Umrigar_2005}
One can easily see that this parameterization of the two-body
interation is not size-consistent. The dissociation of a
heteroatomic diatomic molecule $AB$ with a parameter $b_{AB}$
will lead to two different two-body Jastrow factors on each atom, each
with its own optimal value $b_A$ and $b_B$. To remove the
size-consistence error on a PES using this ansätz for $J_\text{ee}$,
one needs to impose that the parameters of $J_\text{ee}$ are fixed:
$b_A = b_B = b_{AB}$.

When pseudopotentials are used in a QMC calculation, it is common
practice to localize the pseudopotential on the complete wave
function. If the wave function is not size-consistent, so will be the
locality approximation. Recently, the determinant localization
approximation was introduced.\cite{Zen_2019} This approximation
consists in removing the Jastrow factor from the wave function on
which the pseudopotential is localized.
The great advantage of this approximation is that the FN-DMC energy
within this approximation only depends on the parameters of the
determinantal component. Using a size-inconsistent Jastrow factor, or
a non-optimal Jastrow factor will not introduce an additional
size-consistence error in FN-DMC calculations, although it will
reduce the statistical errors by reducing the variance of the local energy.


The energy computed within density functional theory is extensive, and
as it is a mean-field method the convergence to the complete basis set
(CBS) limit is relatively fast. Hence, DFT methods are very well adapted to
the calculation of atomization energies, especially with small basis
sets, but going to the CBS limit will converge to biased atomization
energies because of the use of approximate density functionals.

On the other hand, the convergence of the FCI energies to the CBS
limit will be slower because of the description of short-range electron
correlation with atom-centered functions, but ultimately the exact
energy will be reached.


The 55 molecules of the benchmark for the Gaussian-1
theory\cite{Pople_1989,Curtiss_1990} were chosen to test the quality
of the RSDFT-CIPSI trial wave functions for energy differences.


%\begin{squeezetable}
\begin{table*}
  \caption{Mean absolute error (MAE), mean signed errors (MSE) and
    standard deviations (RMSD) obtained with the different methods and
    basis sets.}
  \label{tab:mad}
  \begin{ruledtabular}
    \begin{tabular}{ll rrr rrr rrr}
Method           &  \(\mu\)     &  \phantom{}  &  VDZ-BFD                &  \phantom{}  &  \phantom{}  &  VTZ-BFD    &  \phantom{}  &  \phantom{}  &  VQZ-BFD    &  \phantom{}  \\
\phantom{}       &  \phantom{}  &  MAE         &  MSE                    &  RMSD          &  MAE         &  MSE        &  RMSD          &  MAE         &  MSE        &  RMSD          \\
\hline
PBE              &  0           &  5.02        &  -3.70                  &  6.04        &  4.57        &  1.00       &  5.32        &  5.31        &  0.79       &  6.27        \\
BLYP             &  0           &  9.53        &  -9.21                  &  7.91        &  5.58        &  -4.44      &  5.80        &  5.86        &  -4.47      &  6.43        \\
PBE0             &  0           &  11.20       &  -10.98                 &  8.68        &  6.40        &  -5.78      &  5.49        &  6.28        &  -5.65      &  5.08        \\
B3LYP            &  0           &  11.27       &  -10.98                 &  9.59        &  7.27        &  -5.77      &  6.63        &  6.75        &  -5.53      &  6.09        \\
\hline
CCSD(T)          &  \(\infty\)  &  24.10       &  -23.96                 &  13.03       &  9.11        &  -9.10      &  5.55        &  4.52        &  -4.38      &  3.60        \\
\hline
RSDFT-CIPSI      &  0           &  10.08       &  3.22                   &  30.51(*)    &  6.31        &  0.91       &  7.93        &  6.35        &  3.88       &  7.20        \\
\phantom{}       &  1/4         &  5.55        &  -4.66                  &  5.52        &  4.58        &  1.06       &  5.72        &  5.48        &  1.52       &  6.93        \\
\phantom{}       &  1/2         &  13.42       &  -13.27                 &  7.36        &  6.77        &  -6.71      &  4.56        &  6.35        &  -5.89      &  5.18        \\
\phantom{}       &  1           &  17.07       &  -16.92                 &  9.83        &  9.06        &  -9.06      &  5.88        &  ---         &  ---        &  ---         \\
\phantom{}       &  2           &  19.20       &  -19.05                 &  10.91       &  ---         &  ---        &  ---         &  ---         &  ---        &  ---         \\
\phantom{}       &  5           &  22.93       &  -22.79                 &  13.24       &  ---         &  ---        &  ---         &  ---         &  ---        &  ---         \\
\phantom{}       &  \(\infty\)  &  23.62       &  -23.48                 &  12.81       &  ---         &  ---        &  ---         &  ---         &  ---        &  ---         \\
\hline
DMC@RSDFT-CIPSI  &  0           &  5.07(44)    &  -4.08(\phantom{0.}44)  &  6.59        &  3.52(19)    &  -1.03(19)  &  4.39        &  3.16(26)    &  -0.12(26)  &  4.12        \\
\phantom{}       &  1/4         &  4.04(37)    &  -3.13(\phantom{0.}37)  &  4.88        &  3.39(77)    &  -0.59(77)  &  4.44        &  2.90(25)    &  0.25(25)   &  3.74        \\
\phantom{}       &  1/2         &  3.74(35)    &  -3.53(\phantom{0.}35)  &  4.03        &  2.46(18)    &  -1.72(18)  &  3.02        &  2.06(35)    &  -0.44(35)  &  2.74        \\
\phantom{}       &  1           &  5.42(29)    &  -5.14(\phantom{0.}29)  &  4.55        &  4.38(94)    &  -4.24(94)  &  5.11        &   ---        &   ---       &   ---        \\
\phantom{}       &  2           &  5.98(83)    &  -5.91(\phantom{0.}83)  &  4.79        &   ---        &   ---       &   ---        &   ---        &   ---       &   ---        \\
\phantom{}       &  5           &  6.18(84)    &  -6.13(\phantom{0.}84)  &  4.87        &   ---        &   ---       &   ---        &   ---        &   ---       &   ---        \\
\phantom{}       &  \(\infty\)  &  7.38(1.08)  &  -7.38(1.08)            &  5.67        &   ---        &   ---       &   ---        &   ---        &   ---       &   ---        \\
\phantom{}       &  Opt.        &  5.84(1.75)  &  -5.63(1.75)            &  4.79        &   ---        &   ---       &   ---        &   ---        &   ---       &   ---        \\
    \end{tabular}
  \end{ruledtabular}
\end{table*}
%\end{squeezetable}

\begin{figure}
  \centering
  \includegraphics[width=\columnwidth]{g2-dmc.pdf}
  \caption{Errors in the DMC atomization energies with the different
    trial wave functions. Each dot corresponds to an atomization
    energy.
    The boxes contain the data between first and third quartiles, and
    the line in the box represents the median. The outliers are shown
    with a cross.}
  \label{fig:g2-dmc-dz}
\end{figure}


%%---------------------------------------
\begin{acknowledgments}
An award of computer time was provided by the Innovative and Novel
Computational Impact on Theory and Experiment (INCITE) program. This
research has used resources of the Argonne Leadership Computing
Facility, which is a DOE Office of Science User Facility supported
under Contract DE-AC02-06CH11357.  AB, was supported by the
U.S. Department of Energy, Office of Science, Basic Energy Sciences,
Materials Sciences and Engineering Division, as part of the
Computational Materials Sciences Program and Center for Predictive
Simulation of Functional Materials.
\end{acknowledgments}


\bibliography{rsdft-cipsi-qmc}

\end{document}