From 2404520fd26e4424636a52577eb67864f97a3acc Mon Sep 17 00:00:00 2001
From: Pierre-Francois Loos <pierrefrancois.loos@gmail.com>
Date: Sun, 29 Nov 2020 20:43:20 +0100
Subject: [PATCH] take 2 Mimi

---
 Manuscript/QUEST_WIREs.tex | 77 ++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/Manuscript/QUEST_WIREs.tex b/Manuscript/QUEST_WIREs.tex
index c4c6958..77bea45 100644
--- a/Manuscript/QUEST_WIREs.tex
+++ b/Manuscript/QUEST_WIREs.tex
@@ -294,16 +294,15 @@ For the $m$th excited state (where $m = 0$ corresponds to the ground state), we
   E_{\text{var}}^{(m)} \approx E_\text{FCI}^{(m)} - \alpha^{(m)} E_{\text{rPT2}}^{(m)},
   \label{eqx}
 \end{equation}
-where $E_{\text{var}}^{(m)}$ and $E_{\text{rPT2}}^{(m)}$ are calculated with CIPSI and $E_\text{FCI}^{(m)}$ is the FCI energy
-to be extrapolated. This relation is valid in the regime of a sufficiently large number of determinants where the second-order perturbational
-correction largely dominates.
-However, in practice, due to the residual higher-order terms, the coefficient $\alpha^{(m)}$ deviates slightly from unity.
+where $E_{\text{var}}^{(m)}$ and $E_{\text{rPT2}}^{(m)}$ are calculated with CIPSI and $E_\text{FCI}^{(m)}$ is the FCI energy to be extrapolated. 
+This relation is valid in the regime of a sufficiently large number of determinants where the second-order perturbational correction largely dominates.
+In theory, the coefficient $\alpha^{(m)}$ should be equal to one but, in practice, due to the residual higher-order terms, it deviates slightly from unity.
 
 Using Eq.~\eqref{eqx} the estimated error on the CIPSI energy is calculated as
 \begin{equation}
   E_{\text{CIPSI}}^{(m)} - E_{\text{FCI}}^{(m)}
   = \qty(E_\text{var}^{(m)}+E_{\text{rPT2}}^{(m)}) - E_{\text{FCI}}^{(m)}
-  = \qty(1-\alpha^{(m)}) E_{\text{rPT2}}^{(m)},
+  = \qty(1-\alpha^{(m)}) E_{\text{rPT2}}^{(m)}
 \end{equation}
 and thus the extrapolated excitation energy associated with the $m$th
 state is given by
@@ -311,7 +310,7 @@ state is given by
   \Delta E_{\text{FCI}}^{(m)}
   = \qty[ E_\text{var}^{(m)} + E_{\text{rPT2}} + \qty(\alpha^{(m)}-1) E_{\text{rPT2}} ]
   - \qty[ E_\text{var}^{(0)} + E_{\text{rPT2}} + \qty(\alpha^{(0)}-1) E_{\text{rPT2}} ]
-  + \mathcal{O}\qty[{E_{\text{rPT2}}^2 }],
+  + \mathcal{O}\qty[{E_{\text{rPT2}}^2 }]
 \end{equation}
 which evidences that the error in $\Delta E_{\text{FCI}}^{(m)}$ can be expressed as $\qty(\alpha^{(m)}-\alpha^{(0)}) E_{\text{rPT2}} + \mathcal{O}\qty[{E_{\text{rPT2}}^2}]$.
 
@@ -325,65 +324,63 @@ E_{\text{rPT2}}^{(0)} \approx E_{\text{rPT2}}^{(m)}$, and
 by using a common set of state-averaged natural orbitals with equal weights for the ground and excited states.
 This last feature tends to make the values of $\alpha^{(0)}$ and $\alpha^{(m)}$ very close to each other, such that the error on the energy difference
 is decreased.
-In the ideal case where we would be able to fully correlate the CIPSI calculations associated with the ground and excited states, the fluctuations of
-$\Delta E_\text{CIPSI}^{(m)}(n)$ as a function of $n$ would completely vanish and the exact excitation energy would be obtained from the first CIPSI iterations.
+In the ideal case where one is able to fully correlate the CIPSI calculations associated with the ground and excited states, the fluctuations of
+$\Delta E_\text{CIPSI}^{(m)}(n)$ as a function of the iteration number $n$ would completely vanish and the exact excitation energy would be obtained from the first CIPSI iterations.
 Quite remarkably, in practice, numerical experience shows that the fluctuations with respect to the extrapolated value $\Delta E_\text{FCI}^{(m)}$ are small,
-zero-centered, almost independent of $n$ when not too close iteration
-numbers are considered, and display a Gaussian-like distribution.
-In addition, as stated just above, the fluctuations are found to be (very weakly) dependent on the iteration number $n$ (see Fig.~\ref{fig:histo}), so
-this dependence will not significantly alter our results and will not be considered here.
+zero-centered, and display a Gaussian-like distribution.
+In addition, as evidenced in Fig.~\ref{fig:histo}, these fluctuations are found to be (very weakly) dependent on the iteration number $n$ (as far as not too close $n$ values are considered). 
+Hence, this weak dependency does not significantly alter our results and will not be considered here.
+
 We thus introduce the following random variable
 \begin{equation}
-X^{(m)}= \frac{\Delta E_\text{CIPSI}^{(m)}(n)- \Delta E_\text{FCI}^{(m)}}{\sigma(n)}
+\label{eq:X}
+	X^{(m)}= \frac{\Delta E_\text{CIPSI}^{(m)}(n)- \Delta E_\text{FCI}^{(m)}}{\sigma(n)}
 \end{equation}
 where
 \begin{equation}
   \Delta E_\text{CIPSI}^{(m)}(n) = \qty[ E_\text{var}^{(m)}(n) +
   E_{\text{rPT2}}^{(m)}(n) ]
-  - \qty[ E_\text{var}^{(0)}(n) + E_{\text{rPT2}}^{(0)}(n) ],
+  - \qty[ E_\text{var}^{(0)}(n) + E_{\text{rPT2}}^{(0)}(n) ]
 \end{equation}
 and
 ${\sigma(n)}$ is a quantity proportional to the average fluctuations of $\Delta E_\text{CIPSI}^{(m)}$.
 A natural choice for $\sigma^2(n)$, playing here the role of a variance, is
 \begin{equation}
-\sigma^2(n) \propto \qty[E_{\text{rPT2}}^{(m)}(n)]^2 + \qty[E_{\text{rPT2}}^{(0)}(n)]^2,
+\sigma^2(n) \propto \qty[E_{\text{rPT2}}^{(m)}(n)]^2 + \qty[E_{\text{rPT2}}^{(0)}(n)]^2
 \end{equation}
-which vanishes in the large-$n$ limit as it should.
+which vanishes in the large-$n$ limit (as it should).
 
 %%% FIGURE 2 %%%
 \begin{figure}
 \centering
 \includegraphics[width=0.9\linewidth]{fig2/fig2}
-\caption{Histogram of the random variable $X^{(m)}$ (see, text). About 200 values of the transition energies
-for the 13 five- and six-membered ring molecules, both for the singlet and triplet transitions and for a number of CIPSI iterations, are used.
-The number $M$ of iterations kept is chosen according to the statistical test presented in the text.}
+\caption{Histogram of the random variable $X^{(m)}$ [see Eq.~\eqref{eq:X} in the main text for its definition]. 
+About 200 values of singlet and triplet excitation energies taken at various iteration number $n$ for the 13 five- and six-membered ring molecules have been considered to build the present histogram.
+The number $M$ of iterations kept at each calculation is chosen according to the statistical test presented in the text.}
 \label{fig:histo}
 \end{figure}
 
-The histogram of $X^{(m)}$ resulting from the excitation energies
-obtained at different values of the CIPSI iterations $n$
-and for the 13 five- and six-membered ring molecules, both for the singlet and triplet transitions,
-is shown in Fig.~\ref{fig:histo}. To avoid transient effects, only excitation energies at sufficiently large $n$ are retained in the data set.
-The criterion used to decide from which precise value of $n$ the data should be kept will be presented below. In our application, the total number
-of values employed to make the histogram is about 200. The dashed line of Fig.~\ref{fig:histo} represents the best Gaussian fit
-(in the sense of least-squares) reproducing the data.
-As seen, the distribution can be described by the Gaussian probability
+The histogram of $X^{(m)}$ resulting from the singlet and triplet excitation energies obtained at various iteration number $n$ for the 13 five- and six-membered ring molecules is shown in Fig.~\ref{fig:histo}. 
+To avoid transient effects, only excitation energies at sufficiently large $n$ are retained in the data set.
+The statistical criterion used to decide from which precise value of $n$ the data should be kept is presented below. 
+In the present example, the total number of values employed to construct the histogram of Fig.~\ref{fig:histo} is about 200. 
+The dashed line represents the best (in a least-squares sense) Gaussian fit reproducing the data.
+As clearly seen from Fig.~\ref{fig:histo}, the distribution can be fairly well described by a Gaussian probability distribution
 \begin{equation}
-P\qty[X^{(m)}] \propto \exp[-\frac{{X^{(m)}}^2} {2{\sigma^{*}}^2} ]
+	P\qty[X^{(m)}] \propto \exp[-\frac{{X^{(m)}}^2} {2{\sigma^{*}}^2} ]
 \end{equation}
-where $\sigma^{*2}$ is some "universal" variance depending only
-on the way the correlated selection of both states is done, not on the molecule considered in our set.
+where $\sigma^{*2}$ is some ``universal'' variance depending only on the way the correlated selection of both states is done, not on the molecule considered in our set.
 
-An estimate of $\Delta E_{\text{FCI}}^{(m)}$ as the average excitation energy of $\Delta E_\text{CIPSI}^{(m)}$ is thus
-$$\Delta E_\text{FCI}^{(m)} = \frac{ \sum_{n=1}^M  \frac{\Delta E_\text{CIPSI}^{(m)}(n)} {\sigma(n)} }
-            { \sum_{n=1}^M  \frac{1}{\sigma(n)} },
-$$
-where $M$ is the number of data kept.
-Now, regarding the estimate of the error on $\Delta E_\text{FCI}^{(m)}$ some caution is required since, although the distribution is globally Gaussian-like
-(see Fig.~\ref{fig:histo}) there exists
-some significant departure from it and we need to take this feature into account.
+For each CIPSI calculation, an estimate of $\Delta E_{\text{FCI}}^{(m)}$ is thus
+\begin{equation}
+	\Delta E_\text{FCI}^{(m)} = \frac{ \sum_{n=1}^M  \frac{\Delta E_\text{CIPSI}^{(m)}(n)} {\sigma(n)} }
+            { \sum_{n=1}^M  \frac{1}{\sigma(n)} }
+\end{equation}
+where $M$ is the number of iterations that has been retained to compute the statistical quantities.
+Regarding the estimate of the error on $\Delta E_\text{FCI}^{(m)}$ some caution is required since, although the distribution is globally Gaussian-like
+(see Fig.~\ref{fig:histo}), there exists some significant deviation from it and we must to take this feature into account.
 
-More precisely, we search for a confidence interval $\mathcal{I}$ such that the true value of the excitation energy $\Delta E_{\text{FCI}}^{(m)}$ lies within one standard deviation of $\Delta E_\text{CIPSI}^{(m)}$, i.e., $P\qty( \Delta E_{\text{FCI}}^{(m)} \in \qty[ \Delta E_\text{CIPSI}^{(m)} \pm \sigma ] \; \Big| \; \mathcal{G}) = 0.6827$.
+More precisely, we search for a confidence interval $\mathcal{I}$ such that the true value of the excitation energy $\Delta E_{\text{FCI}}^{(m)}$ lies within one standard deviation of $\Delta E_\text{CIPSI}^{(m)}$, i.e., $P\qty( \Delta E_{\text{FCI}}^{(m)} \in \qty[ \Delta E_\text{CIPSI}^{(m)} \pm \sigma ] \; \Big| \; \mathcal{G}) = p = 0.6827$.
 In a Bayesian framework, the probability that $\Delta E_{\text{FCI}}^{(m)}$ is in an interval $\mathcal{I}$ is
 \begin{equation}
    P\qty( \Delta E_{\text{FCI}}^{(m)} \in \mathcal{I} ) = P\qty( \Delta E_{\text{FCI}}^{(m)} \in I \Big| \mathcal{G}) \times P\qty(\mathcal{G})
@@ -400,7 +397,7 @@ The inverse of the cumulative distribution function of the $t$-distribution, $t_
    \beta = t_{\text{CDF}}^{-1} \qty[
    \frac{1}{2} \qty( 1 + \frac{0.6827}{P(\mathcal{G})}), M ]
 \end{equation}
-such that $P\qty( \Delta E_{\text{FCI}}^{(m)} \in \qty[ \Delta E_{\text{CIPSI}}^{(m)} \pm \beta \sigma ] ) = p = 0.6827$.
+such that $P\qty( \Delta E_{\text{FCI}}^{(m)} \in \qty[ \Delta E_{\text{CIPSI}}^{(m)} \pm \beta \sigma ] ) = p $.
 Only the last $M>2$ computed transition energies are considered. $M$ is chosen such that $P(\mathcal{G})>0.8$ and such that the error bar is minimal.
 If all the values of $P(\mathcal{G})$ are below $0.8$, $M$ is chosen such that $P(\mathcal{G})$ is maximal.
 A Python code associated with this procedure is provided in the {\SupInf}.