This commit is contained in:
Pierre-Francois Loos 2020-11-19 11:53:36 +01:00
parent 40ea13c584
commit f98b0d6fc2
4 changed files with 142 additions and 54 deletions

Binary file not shown.

View File

@ -1,13 +1,95 @@
%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/
%% Created for Pierre-Francois Loos at 2020-11-18 16:27:55 +0100
%% Created for Pierre-Francois Loos at 2020-11-19 11:36:40 +0100
%% Saved with string encoding Unicode (UTF-8)
@article{Svensson_1996a,
author = {Svensson, Mats and Humbel, St{\'e}phane and Froese, Robert D. J. and Matsubara, Toshiaki and Sieber, Stefan and Morokuma, Keiji},
date-added = {2020-11-19 11:33:34 +0100},
date-modified = {2020-11-19 11:35:04 +0100},
doi = {10.1021/jp962071j},
eprint = {https://doi.org/10.1021/jp962071j},
journal = {The Journal of Physical Chemistry},
number = {50},
pages = {19357-19363},
title = {ONIOM: A Multilayered Integrated MO + MM Method for Geometry Optimizations and Single Point Energy Predictions. A Test for Diels-Alder Reactions and Pt(P(t-Bu)3)2 + H2 Oxidative Addition},
url = {https://doi.org/10.1021/jp962071j},
volume = {100},
year = {1996},
Bdsk-Url-1 = {https://doi.org/10.1021/jp962071j}}
@article{Svensson_1996b,
author = {Svensson,Mats and Humbel,St{\'e}phane and Morokuma,Keiji},
date-added = {2020-11-19 11:33:14 +0100},
date-modified = {2020-11-19 11:33:49 +0100},
doi = {10.1063/1.472235},
eprint = {https://doi.org/10.1063/1.472235},
journal = {The Journal of Chemical Physics},
number = {9},
pages = {3654-3661},
title = {Energetics using the single point IMOMO (integrated molecular orbital+molecular orbital) calculations: Choices of computational levels and model system},
url = {https://doi.org/10.1063/1.472235},
volume = {105},
year = {1996},
Bdsk-Url-1 = {https://doi.org/10.1063/1.472235}}
@book{Angyan_2020,
author = {{\'A}ngy{\'a}n, J{\'a}nos and Dobson, John and Jansen, Georg and Gould, Tim},
date-added = {2020-11-19 10:04:56 +0100},
date-modified = {2020-11-19 10:05:10 +0100},
doi = {10.1039/9781782623861},
isbn = {978-1-78262-045-7},
pages = {P001-434},
publisher = {The Royal Society of Chemistry},
series = {Theoretical and Computational Chemistry Series},
subtitle = {An Introduction to Physical Models and Computational Methods},
title = {London Dispersion Forces in Molecules{,} Solids and Nano-structures},
url = {http://dx.doi.org/10.1039/9781782623861},
year = {2020},
Bdsk-Url-1 = {http://dx.doi.org/10.1039/9781782623861}}
@article{Helgaker_1989,
author = {Helgaker, T. and J{\o}rgensen, P. and Handy, N.C.},
date-added = {2020-11-19 09:59:36 +0100},
date-modified = {2020-11-19 09:59:43 +0100},
doi = {10.1007/BF00532006},
journal = {Theoret. Chim. Acta},
pages = {227--245},
title = {A Numerically Stable Procedure for Calculating M{\o}ller-Plesset Energy Derivatives, Derived Using the Theory of Lagrangians},
volume = {76},
year = {1989},
Bdsk-Url-1 = {https://doi.org/10.1007/BF00532006}}
@article{Koch_1990b,
author = {Koch, H. and Jensen, H. J. Aa. and Jorgensen, P. and Helgaker, T.},
date-added = {2020-11-19 09:59:15 +0100},
date-modified = {2020-11-19 09:59:23 +0100},
journal = {J. Chem. Phys.},
pages = {3345--3350},
title = {Excitation Energies from the Coupled Cluster Singles and Doubles Linear Response Function (CCSDLR). Applications to Be, CH$^+$, CO, and H$_2$O},
volume = {93},
year = {1990}}
@article{Hattig_2003,
author = {H{\"a}ttig, Christof},
date-added = {2020-11-19 09:57:39 +0100},
date-modified = {2020-11-19 09:57:44 +0100},
doi = {10.1063/1.1564061},
eprint = {https://doi.org/10.1063/1.1564061},
journal = {J. Chem. Phys.},
number = {17},
pages = {7751--7761},
title = {Geometry Optimizations with the Coupled-Cluster Model CC2 using the Resolution-of-the-Identity Approximation},
url = {https://doi.org/10.1063/1.1564061},
volume = {118},
year = {2003},
Bdsk-Url-1 = {https://doi.org/10.1063/1.1564061}}
@article{Adamo_2013,
author = {Adamo, C. and Jacquemin, D.},
date-added = {2020-11-18 16:26:09 +0100},
@ -207,14 +289,12 @@
@article{Scemama_2020,
author = {Scemama,Anthony and Giner,Emmanuel and Benali,Anouar and Loos,Pierre-Fran{\c c}ois},
date-added = {2020-11-04 21:14:14 +0100},
date-modified = {2020-11-04 21:14:34 +0100},
date-modified = {2020-11-19 10:13:08 +0100},
doi = {10.1063/5.0026324},
eprint = {https://doi.org/10.1063/5.0026324},
journal = {The Journal of Chemical Physics},
journal = {J. Chem. Phys.},
number = {17},
pages = {174107},
title = {Taming the fixed-node error in diffusion Monte Carlo via range separation},
url = {https://doi.org/10.1063/5.0026324},
volume = {153},
year = {2020},
Bdsk-Url-1 = {https://doi.org/10.1063/5.0026324}}
@ -1313,15 +1393,17 @@
year = {2018},
Bdsk-Url-1 = {http://dx.doi.org/10.1039/C8CP05554H}}
@misc{Benali_2020,
archiveprefix = {arXiv},
@article{Benali_2020,
author = {Anouar Benali and Kevin Gasperich and Kenneth D. Jordan and Thomas Applencourt and Ye Luo and M. Chandler Bennett and Jaron T. Krogel and Luke Shulenburger and Paul R. C. Kent and Pierre-Fran{\c c}ois Loos and Anthony Scemama and Michel Caffarel},
date-added = {2020-09-04 10:00:38 +0200},
date-modified = {2020-09-04 10:00:48 +0200},
eprint = {2007.11673},
primaryclass = {physics.chem-ph},
date-modified = {2020-11-19 10:12:43 +0100},
doi = {10.1063/5.0021036},
journal = {J. Chem. Phys.},
pages = {184111},
title = {Towards a Systematic Improvement of the Fixed-Node Approximation in Diffusion Monte Carlo for Solids},
year = {2020}}
volume = {153},
year = {2020},
Bdsk-Url-1 = {https://doi.org/10.1063/5.0021036}}
@article{Li_2020,
author = {Li, Junhao and Yao, Yuan and Holmes, Adam A. and Otten, Matthew and Sun, Qiming and Sharma, Sandeep and Umrigar, C. J.},

View File

@ -82,16 +82,16 @@
\maketitle
\begin{abstract}
We describe our efforts of the past few years to create a large set of more than \alert{470} highly-accurate vertical excitation energies of various natures ($\pi \to \pis$, $n \to \pis$, double excitation, Rydberg, singlet, doublet, triplet, etc) for small- and medium-sized molecules.
We describe our efforts of the past few years to create a large set of more than 400 highly-accurate vertical excitation energies of various natures ($\pi \to \pis$, $n \to \pis$, double excitation, Rydberg, singlet, doublet, triplet, etc) for small- and medium-sized molecules.
These values have been obtained using an incremental strategy which consists in combining high-order coupled cluster and selected configuration interaction calculations using increasingly large diffuse basis sets in order to reach high accuracy.
One of the key aspect of the so-called QUEST database of vertical excitations is that it does not rely on any experimental values, avoiding potential biases inherently linked to experiments and facilitating theoretical cross comparison processes
Following a composite protocol, we have been able to produce theoretical best estimate (TBEs) with the aug-cc-pVTZ basis set, as well as basis set corrected TBEs (i.e., near the complete basis set limit) for each of these transitions.
These TBEs have been employed to benchmark a large number of (lower-order) wave function methods such as CIS(D), ADC(2), STEOM-CCSD, CCSD, CCSDR(3), CCSDT-3, ADC(3), CC3, NEVPT2, and others.
In order to gather the huge number of data produced during the QUEST project, we have created a website where one can easily test and compare the accuracy of a given method with respect to various variables such as the molecule size or its family, the nature of the excited states, the size of the basis set, etc.
One of the key aspect of the so-called QUEST database of vertical excitations is that it does not rely on any experimental values, avoiding potential biases inherently linked to experiments and facilitating theoretical cross comparisons.
Following a composite protocol, we have been able to produce theoretical best estimate (TBEs) with the aug-cc-pVTZ basis set for each of these transitions, as well as basis set corrected TBEs (i.e., near the complete basis set limit) for some of them.
The TBEs/aug-cc-pVTZ have been employed to benchmark a large number of (lower-order) wave function methods such as CIS(D), ADC(2), STEOM-CCSD, CCSD, CCSDR(3), CCSDT-3, ADC(3), CC3, NEVPT2, and others.
In order to gather the huge number of data produced during the QUEST project, we have created a website [\url{https://github.com/mveril/QUESTDB_website}] where one can easily test and compare the accuracy of a given method with respect to various variables such as the molecule size or its family, the nature of the excited states, the size of the basis set, etc.
%Add website address here
We hope that the present review will provide a useful summary of our work so far and foster new developments around excited-state methods.
% Please include a maximum of seven keywords
\keywords{Excited states, full configuration interaction, coupled-cluster excitation energies}
\keywords{excited states, benchmark, database, full configuration interaction, coupled cluster theory, excitation energies}
\end{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -100,19 +100,19 @@ We hope that the present review will provide a useful summary of our work so far
Nowadays, there exists a very large number of electronic structure computational approaches, more or less expensive depending on their overall accuracy, able to quantitatively predict the absolute and/or relative energies of electronic states in molecular systems \cite{SzaboBook,JensenBook,CramerBook,HelgakerBook}.
One important aspect of some of these theoretical methods is their ability to access the energies of electronic excited states, i.e., states that have higher total energies than the so-called ground (that is, lowest-energy) state.
The faithful description of excited states is particularly challenging from a theoretical point of view \cite{Roos_1996,Piecuch_2002,Dreuw_2005,Krylov_2006,Sneskov_2012,Gonzales_2012,Laurent_2013,Adamo_2013,Ghosh_2018,Blase_2020,Loos_2020a} and is key to a deeper understanding of photochemical and photophysical processes like absorption, fluorescence, phosphorescence or even chemoluminescence \cite{Bernardi_1996,Olivucci_2010,Robb_2007,Navizet_2011}.
One important aspect of some of these theoretical methods is their ability to access the energies of electronic excited states, i.e., states that have higher total energies than the so-called ground (that is, lowest-energy) state \cite{Roos_1996,Piecuch_2002,Dreuw_2005,Krylov_2006,Sneskov_2012,Gonzales_2012,Laurent_2013,Adamo_2013,Ghosh_2018,Blase_2020,Loos_2020a}.
The faithful description of excited states is particularly challenging from a theoretical point of view and is key to a deeper understanding of photochemical and photophysical processes like absorption, fluorescence, phosphorescence or even chemoluminescence \cite{Bernardi_1996,Olivucci_2010,Robb_2007,Navizet_2011}.
For a given level of theory, ground-state methods are usually more accurate than their excited-state analogs.
The reasons behind this are (at least) threefold: i) one might lack a proper variational principle for excited-state energies and one may have to rely on response theory formalisms which inherently introduce a ground-state ``bias'', iii) accurately modeling the electronic structure of excited states usually requires larger one-electron basis sets (including diffuse functions most of the times) than their ground-state counterpart, and iii) excited states can be governed by different amounts of dynamic/static correlations, present very different physical natures ($\pi \to \pis$, $n \to \pis$, charge transfer, double excitation, valence, Rydberg, singlet, doublet, triplet, etc), yet be very close in energy from one another.
Hence, designing excited-state methods able to tackle simultaneously and on an equal footing all these types of excited states at an affordable cost remain an open challenge in theoretical computational chemistry \cite{Piecuch_2002,Dreuw_2005,Krylov_2006,Gonzales_2012,Ghosh_2018,Blase_2020,Loos_2020a,Eriksen_2021}.
The reasons behind this are (at least) threefold: i) one might lack a proper variational principle for excited-state energies and one may have to rely on response theory \cite{Monkhorst_1977,Helgaker_1989,Koch_1990,Koch_1990b,Christiansen_1995b,Christiansen_1998b,Hattig_2003,Kallay_2004,Hattig_2005c} formalisms which inherently introduce a ground-state ``bias'', iii) accurately modeling the electronic structure of excited states usually requires larger one-electron basis sets (including diffuse functions most of the times) than their ground-state counterpart, and iii) excited states can be governed by different amounts of dynamic/static correlations, present very different physical natures ($\pi \to \pis$, $n \to \pis$, charge transfer, double excitation, valence, Rydberg, singlet, doublet, triplet, etc), yet be very close in energy from one another.
Hence, designing excited-state methods able to tackle simultaneously and on an equal footing all these types of excited states at an affordable cost remain an open challenge in theoretical computational chemistry as evidenced by the large number of review articles on this particular subject \cite{Roos_1996,Piecuch_2002,Dreuw_2005,Krylov_2006,Sneskov_2012,Gonzales_2012,Laurent_2013,Adamo_2013,Ghosh_2018,Blase_2020,Loos_2020a}.
When designing a new theoretical model, the first feature that one might want to test is its overall accuracy, i.e., its ability to reproduce reference (or benchmark) values for a given system with well-defined setup (same geometry, basis set, etc).
These values can be absolute or relative energies, geometrical parameters, physical or chemical spectroscopic properties extracted from experiments, high-level theoretical calculations, or any combination of these.
These values can be absolute and/or relative energies, geometrical parameters, physical or chemical spectroscopic properties extracted from experiments, high-level theoretical calculations, or any combination of these.
To this end, the electronic structure community has designed along the years benchmark sets, i.e., sets of molecules for which one could (very) accurately compute theoretical estimates and/or access solid experimental data for given properties.
Regarding ground-states properties, two of the oldest and most employed sets are probably the Gaussian-1 and Gaussian-2 benchmark sets \cite{Pople_1989,Curtiss_1991,Curtiss_1997} developed by the group of Pople in the 1990's.
For example, the Gaussian-2 set gathers atomization energies, ionization energies, electron affinities, proton affinities, bond dissociation energies, and reaction barriers.
This set was subsequently extended and refined \cite{Curtiss_1998,Curtiss_2007}.
Another very useful set for the design of methods able to catch dispersion effects is the S22 benchmark set \cite{Jureka_2006} (and its extended S66 version \cite{Rezac_2011}) of Hobza and collaborators which provides benchmark interaction energies for weakly-interacting (non covalent) systems.
Another very useful set for the design of methods able to catch dispersion effects \cite{Angyan_2020} is the S22 benchmark set \cite{Jureka_2006} (and its extended S66 version \cite{Rezac_2011}) of Hobza and collaborators which provides benchmark interaction energies for weakly-interacting (non covalent) systems.
One could also mentioned the $GW$100 set \cite{vanSetten_2015,Krause_2015,Maggio_2016} (and its $GW$5000 extension \cite{Stuke_2020}) of ionization energies which has helped enormously the community to settle on the implementation of $GW$-type methods for molecular systems \cite{vanSetten_2013,Bruneval_2016,Caruso_2016,Govoni_2018}.
The extrapolated ab initio thermochemistry (HEAT) set designed to achieve high accuracy for enthalpies of formation of atoms and small molecules (without experimental data) is yet another successful example of benchmark set \cite{Tajti_2004,Bomble_2006,Harding_2008}.
More recently, the benchmark datasets provided by the \textit{Simons Collaboration on the Many-Electron Problem} have been extremely valuable to the community by providing, for example, highly-accurate ground state energies for hydrogen chains \cite{Motta_2017} as well as transition metal atoms and their ions and monoxides \cite{Williams_2020}.
@ -151,7 +151,7 @@ Their fundamental philosophy consists, roughly speaking, in retaining only the m
Originally developed in the late 1960's by Bender and Davidson \cite{Bender_1969} as well as Whitten and Hackmeyer \cite{Whitten_1969}, new efficient SCI algorithms have resurfaced recently.
Four examples are adaptive sampling CI (ASCI) \cite{Tubman_2016,Tubman_2018,Tubman_2020}, iCI \cite{Liu_2014,Liu_2016,Lei_2017,Zhang_2020}, semistochastic heat-bath CI (SHCI) \cite{Holmes_2016,Holmes_2017,Sharma_2017,Li_2018,Li_2020,Yao_2020}), and \textit{Configuration Interaction using a Perturbative Selection made Iteratively} (CIPSI) \cite{Huron_1973,Giner_2013,Giner_2015,Garniron_2019}.
These four flavors of SCI include a second-order perturbative (PT2) correction which is key to estimate the ``distance'' to the FCI solution (see below).
The QUEST set of excitation energies relies on the CIPSI algorithm, which is, from a historical point of view, one of the oldest SCI algorithm.
The SCI calculations performed for the QUEST set of excitation energies relies on the CIPSI algorithm, which is, from a historical point of view, one of the oldest SCI algorithm.
It was developed in 1973 by Huron, Rancurel, and Malrieu \cite{Huron_1973} (see also Refs.~\cite{Evangelisti_1983,Cimiraglia_1985,Cimiraglia_1987,Illas_1988,Povill_1992}).
Recently, the determinant-driven CIPSI algorithm has been efficiently implemented \cite{Garniron_2019} in the open-source programming environment QUANTUM PACKAGE by our group enabling to perform massively parallel computations \cite{Garniron_2017,Garniron_2018,Garniron_2019,Loos_2020e}.
CIPSI is also frequently employed to provide accurate trial wave functions for quantum Monte Carlo calculations in molecules \cite{Caffarel_2014,Caffarel_2016a,Caffarel_2016b,Giner_2013,Giner_2015,Scemama_2015,Scemama_2016,Scemama_2018,Scemama_2018b,Scemama_2019,Dash_2018,Dash_2019,Scemama_2020} and more recently for periodic solids \cite{Benali_2020}.
@ -161,7 +161,7 @@ The present article is organized as follows.
In Sec.~\ref{sec:tools}, we detail the specificities of our protocol by providing computational details regarding geometries, basis sets, (reference and benchmarked) computational methods, and a new way of estimating rigorously the extrapolation error in SCI calculations which is tested by computing additional FCI values for five- and six-membered rings.
We then describe in Sec.~\ref{sec:QUEST} the content of our five QUEST subsets providing for each of them the number of reference excitation energies, the nature and size of the molecules, the list of benchmarked methods, as well as other specificities.
A special emphasis is placed on our latest add-on, QUEST\#5, specifically designed for the present manuscript where we have considered, in particular but not only, larger molecules.
Section \ref{sec:TBE} discusses the generation of the TBEs, while Sec.~\ref{sec:bench} proposes a comprehensive benchmark of various methods on the entire QUEST set which is composed by more than \alert{470} excitations with, in addition, a specific analysis for each type of excited states.
Section \ref{sec:TBE} discusses the generation of the TBEs, while Sec.~\ref{sec:bench} proposes a comprehensive benchmark of various methods on the entire QUEST set which is composed by more than 400 excitations with, in addition, a specific analysis for each type of excited states.
Section \ref{sec:website} describes the feature of the website that we have specifically designed to gather the entire data generated during these last few years.
Thanks to this website, one can easily test and compare the accuracy of a given method with respect to various variables such as the molecule size or its family, the nature of the excited states, the size of the basis set, etc.
Finally, we draw our conclusions in Sec.~\ref{sec:ccl} where we discuss, in particular, future projects aiming at expanding and improving the usability and accuracy of the QUEST database.
@ -174,7 +174,7 @@ Finally, we draw our conclusions in Sec.~\ref{sec:ccl} where we discuss, in part
%=======================
\subsection{Geometries}
%=======================
The GS structures of the molecules included in the QUEST dataset have been systematically optimized at the CC3/aug-cc-pVTZ level of theory, except for a very few cases.
The ground-state structures of the molecules included in the QUEST dataset have been systematically optimized at the CC3/aug-cc-pVTZ level of theory, except for a very few cases.
As shown in Refs.~\cite{Hattig_2005c,Budzak_2017}, CC3 provides extremely accurate ground- and excited-state geometries.
These optimizations have been performed using DALTON 2017 \cite{dalton} and CFOUR 2.1 \cite{cfour} applying default parameters.
For the open-shell derivatives beloging to QUEST\#4 \cite{Loos_2020c}, the geometries are optimized at the UCCSD(T)/aug-cc-pVTZ level using the GAUSSIAN16 program \cite{Gaussian16} and applying the ``tight'' convergence threshold.
@ -198,8 +198,8 @@ These basis sets are available from the \href{https://www.basissetexchange.org}{
In order to compute reference vertical energies, we have designed different strategies depending on the actual nature of the transition and the size of the system.
For small molecules (typically 1--3 non-hydrogen atoms), we mainly resort to SCI methods which can provide near-FCI excitation energies for compact basis sets.
Obviously, the smaller the molecule, the larger the basis we can afford.
For larger systems (\ie, 4--6 non-hydrogen atom), one cannot afford SCI calculations anymore expect in a few exceptions, and we then rely on LR-CC theory (LR-CCSDT and LR-CCSDTQ typically \cite{Kucharski_1991,Kallay_2003,Kallay_2004,Hirata_2000,Hirata_2004}) to obtain accurate transition energies.
In the following, we will omit the prefix LR for the sake of clarity, as equivalent values would be obtained with the equation-of-motion (EOM) formalism.
For larger systems (\ie, 4--6 non-hydrogen atom), one cannot afford SCI calculations anymore expect in a few special occasions, and we then rely on LR-CC theory (LR-CCSDT and LR-CCSDTQ typically \cite{Kucharski_1991,Kallay_2003,Kallay_2004,Hirata_2000,Hirata_2004}) to obtain accurate transition energies.
In the following, we will omit the prefix LR for the sake of clarity, as equivalent values would be obtained with the equation-of-motion (EOM) formalism \cite{Rowe_1968,Stanton_1993}.
The CC calculations are performed with several codes.
For closed-shell molecules, CC3 \cite{Christiansen_1995b,Koch_1997} calculations are achieved with DALTON \cite{dalton} and CFOUR \cite{cfour}.
@ -220,7 +220,7 @@ These extrapolated total energies (simply labeled as $E_\text{FCI}$ in the remai
Depending on the set, we estimated the extrapolation error via different techniques.
For example, in Ref.~\cite{Loos_2020b}, we estimated the extrapolation error by the difference between the transition energies obtained with the largest SCI wave function and the FCI extrapolated value.
This definitely cannot be viewed as a true error bar, but it provides a rough idea of the quality of the FCI extrapolation and estimate.
Below, we provide a much cleaner way of estimating the extrapolation error in SCI methods, and we adopt this scheme for the five- and six-membered rings.
Below, we provide a much cleaner way of estimating the extrapolation error in SCI methods, and we adopt this scheme for the five- and six-membered rings considered in the QUEST\#3 subset.
The particularity of the current implementation is that the selection step and the PT2 correction are computed \textit{simultaneously} via a hybrid semistochastic algorithm \cite{Garniron_2017,Garniron_2019}.
Moreover, a renormalized version of the PT2 correction (dubbed rPT2) has been recently implemented for a more efficient extrapolation to the FCI limit \cite{Garniron_2019}.
We refer the interested reader to Ref.~\cite{Garniron_2019} where one can find all the details regarding the implementation of the CIPSI algorithm.
@ -264,6 +264,7 @@ The definition of the active space considered for each system as well as the num
%------------------------------------------------
In this section, we present our scheme to estimate the extrapolation error in SCI calculations.
This new protocol is then applied to five- and six-membered ring molecules where SCI calculations are particularly challenging even for small basis sets.
Note that the present method does only applied to ``state-averaged'' SCI calculations where ground- and excited-state energies are produced during the same calculation with the same set of molecular orbitals, not to ``state-specific'' calculations where one computes solely the energy of a single state (like conventional ground-state calculations).
For the $m$th excited state (where $m = 0$ corresponds to the ground state), we usually estimate its FCI energy $E_{\text{FCI}}^{(m)}$ by performing a linear extrapolation of its variational energy $E_\text{var}^{(m)}$ as a function of its rPT2 correction $E_{\text{rPT2}}^{(m)}$ as follows
\begin{equation}
@ -329,7 +330,7 @@ In this case, the error bar is estimated via the extrapolation distance, \ie, th
This strategy has been considered in some of our previous works \cite{Loos_2020b,Loos_2020c,Loos_2020e}.
The deviation from the CCSDT excitation energies for the same set of excitations are depicted in Fig.~\ref{fig:errors}, where the red dots correspond to the excitation energies and error bars estimated via the present method, and the blue dots correspond to the excitation energies obtained via a three-point linear fit and error bars estimated via the extrapolation distance.
These results contains a good balance between well-behaved and ill-behaved cases.
For example, cyclopentadiene and furan correspond to well-behaved scenarios where the two flavors of the extrapolations for the excitation energy yield nearly identical estimates and the error bars associated with these two methods nicely overlap.
For example, cyclopentadiene and furan correspond to well-behaved scenarios where the two flavors of extrapolations yield nearly identical estimates and the error bars associated with these two methods nicely overlap.
In these cases, one can observe that our method based on Gaussian random variables provides almost systematically smaller error bars.
Even in less idealistic situations (like in imidazole, pyrrole, and thiophene), the results are very satisfactory and stable.
The six-membered rings represent much more challenging cases for SCI methods, and even for these systems the newly-developed method provides realistic error bars, and allows to easily detect problematic events (like pyridine for instance).
@ -340,8 +341,7 @@ A selection of these results can be found in the {\SupInf}.
%%% TABLE I %%%
\begin{table}
\centering
\caption{Singlet and triplet excitation energies (in eV) obtained at the CC3, CCSDT, and CIPSI levels of theory with the 6-31+G(d) basis set for various five- and six-membered rings.
The error bars reported in parenthesis correspond to one standard deviation.}
\caption{Singlet and triplet excitation energies (in eV) obtained at the CC3, CCSDT, and CIPSI levels of theory with the 6-31+G(d) basis set for various five- and six-membered rings.}
\label{tab:cycles}
\begin{threeparttable}
\begin{tabular}{lccccc}
@ -380,6 +380,7 @@ Triazine & $^1A_1''(n \ra \pis)$ & 4.85 & 4.84 & 4.77(13)& 5.12(51) \\
\end{tabular}
\begin{tablenotes}
\item $^a$ Excitation energies and error bars estimated via the present method based on Gaussian random variables (see Sec.~\ref{sec:error}).
The error bars reported in parenthesis correspond to one standard deviation.
\item $^b$ Excitation energies obtained via a three-point linear fit using the three largest CIPSI variational wave functions, and error bars estimated via the extrapolation distance, \ie, the difference in excitation energies obtained with the three-point linear extrapolation and the largest CIPSI wave function.
\end{tablenotes}
\end{threeparttable}
@ -389,8 +390,7 @@ Triazine & $^1A_1''(n \ra \pis)$ & 4.85 & 4.84 & 4.77(13)& 5.12(51) \\
\begin{figure}
\centering
\includegraphics[width=\linewidth]{fig2}
\caption{Deviation from the CCSDT excitation energies for the lowest singlet and triplet excitation energies (in eV) of five- and six-membered rings obtained at the CIPSI/6-31+G(d) level of theory. Red dots: excitation energies and error bars estimated via the present method (see Sec.~\ref{sec:error}). Blue dots: excitation energies obtained via a three-point linear fit using the three largest CIPSI wave functions, and error bars estimated via the extrapolation distance, \ie, the difference in excitation energies obtained with the three-point linear extrapolation and the largest CIPSI wave function.
The error bars corresponds to one standard deviation.}
\caption{Deviation from the CCSDT excitation energies for the lowest singlet and triplet excitation energies (in eV) of five- and six-membered rings obtained at the CIPSI/6-31+G(d) level of theory. Red dots: excitation energies and error bars estimated via the present method (see Sec.~\ref{sec:error}). Blue dots: excitation energies obtained via a three-point linear fit using the three largest CIPSI wave functions, and error bars estimated via the extrapolation distance, \ie, the difference in excitation energies obtained with the three-point linear extrapolation and the largest CIPSI wave function.}
\label{fig:errors}
\end{figure}
@ -402,10 +402,10 @@ Triazine & $^1A_1''(n \ra \pis)$ & 4.85 & 4.84 & 4.77(13)& 5.12(51) \\
%=======================
\subsection{Overview}
%=======================
The QUEST database gathers more than \alert{470} highly-accurate excitation energies of various natures (valence, Rydberg, $n \ra \pis$, $\pi \ra \pis$, singlet, doublet, triplet, and double excitations) for molecules ranging from diatomics to molecules as large as naphthalene (see Fig.~\ref{fig:molecules}).
This set is also diverse chemically, with organic and inorganic systems, open and closed shell systems, acyclic and cyclic systems, pure hydrocarbons and various heteroatoms, etc.
The QUEST database gathers more than 400 highly-accurate excitation energies of various natures (valence, Rydberg, $n \ra \pis$, $\pi \ra \pis$, singlet, doublet, triplet, and double excitations) for molecules ranging from diatomics to molecules as large as naphthalene (see Fig.~\ref{fig:molecules}).
This set is also chemically diverse, with organic and inorganic systems, open- and closed-shell compounds, acyclic and cyclic systems, pure hydrocarbons and various heteroatoms, etc.
Each of the five subsets making up the QUEST dataset is detailed below.
Throughout the present article, we report several statistical indicators: the mean signed error (MSE), mean absolute error (MAE), root-mean square error (RMSE), and standard deviation of the errors (SDE).
Throughout the present article, we report several statistical indicators: the mean signed error (MSE), mean absolute error (MAE), root-mean square error (RMSE), and standard deviation of the errors (SDE), as well as the maximum positive [Max(+)] and maximum negative [Max($-$)] errors.
%%% FIGURE 3 %%%
\begin{figure}
@ -421,7 +421,7 @@ Throughout the present article, we report several statistical indicators: the me
%=======================
The QUEST\#1 benchmark set \cite{Loos_2018a} consists of 110 vertical excitation energies (as well as oscillator strengths) from 18 molecules with sizes ranging from one to three non-hydrogen atoms (water, hydrogen sulfide, ammonia, hydrogen chloride, dinitrogen, carbon monoxide, acetylene, ethylene, formaldehyde, methanimine, thioformaldehyde, acetaldehyde, cyclopropene, diazomethane, formamide, ketene, nitrosomethane, and the smallest
streptocyanine). For this set, we provided two sets of TBEs: i) one obtained within the frozen-core approximation and the aug-cc-pVTZ basis set, and ii) another one including further corrections for basis set incompleteness and ``all electron'' effects.
For the former set, we systematically employed FCI/aug-cc-pVTZ values to define our TBEs but for a few cases.
For the former set, we systematically employed FCI/aug-cc-pVTZ values to define our TBEs, except for a few cases.
For the latter set, both the ``all electron'' correlation and the basis set corrections were systematically obtained at the CC3 level of theory and with the d-aug-cc-pV5Z basis for the nine smallest molecules, and slightly more compact basis sets for the larger compounds.
Our TBE/aug-cc-pVTZ reference excitation energies were employed to benchmark a series of popular excited-state wave function methods partially or fully accounting for double and triple excitations, namely CIS(D), CC2, CCSD, STEOM-CCSD, CCSDR(3), CCSDT-3, CC3, ADC(2), and ADC(3).
Our main conclusions were that i) ADC(2) and CC2 show strong similarities in terms of accuracy, ii) STEOM-CCSD is, on average, as accurate as CCSD, the latter overestimating transition energies, iii) CC3 is extremely accurate (with a mean absolute error of only $\sim 0.03$ eV) and that although slightly less accurate than CC3, CCSDT-3 could be used as a reliable reference for benchmark studies, and iv) ADC(3) was found to be significantly less accurate than CC3 by overcorrecting ADC(2) excitation energies.
@ -525,23 +525,32 @@ We refer the interested reader to the {\SupInf} for a detailed discussion of eac
\section{Theoretical best estimates}
\label{sec:TBE}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We discuss in this section the generation of the TBEs obtained with the aug-cc-pVTZ basis as well as oscillator strengths for most transitions.
The exhaustive list of TBEs can be found in Table \ref{tab:TBEs} alongside various specifications:
We discuss in this section the generation of the TBEs obtained with the aug-cc-pVTZ basis.
The exhaustive list of TBEs can be found in Table \ref{tab:TBE} alongside various specifications: the molecule's name, the excitation, its nature (valence, Rydberg, or charge transfer), its oscillator strength (when spatially- and spin-allowed), and its percentage of single excitations $\%T_1$ (computed at the CC3 level).
All these quantities are computed with the same aug-cc-pVTZ basis.
Importantly, we also report the composite approach considered to compute the TBEs (see column ``Method'').
Following an ONIOM-like strategy \cite{Svensson_1996a,Svensson_1996b}, the TBEs are computed as ``A/SB + [B/TB - B/SB]'', where A/SB is the excitation energy computed with a method A in a smaller basis (SB), and B/SB and B/TB are excitation energies computed with a method B in the small basis and target basis TB = aug-cc-pVTZ, respectively.
Talking about numbers, the QUEST database is composed by 488 excitation energies, 434 of them being considered as ``safe'' (\ie, chemically-accurate for the considered basis set and geometry), 291 singlet, 197 triplet, 361 valence, and 125 Rydberg excited states. From these, 135 transitions corresponds to $n \ra \pis$ excitations, 198 to $\pi \ra \pis$, and 13 are doubly-excited states. In terms of molecular sizes, 146 excitations are obtained in molecules having in-between 1 and 3 non-hydrogen atoms, 97 excitations from 4 non-hydrogen atom compounds, 177 from molecules composed by 5 and 6 non-hydrogen atoms, and, finally, 68 excitations are obtained from systems with 7 to 10 non-hydrogen atoms.
\begin{ThreePartTable}
\scriptsize
\centering
\begin{longtable}{clccccclc}
\caption{Theoretical best estimates TBEs (in eV), oscillator strengths $f$, percentage of single excitations $\%T_1$ involved in the transition (computed at the CC3 level) for the entire QUEST database.
``Method'' provides the protocol employed to compute the TBEs, ``\# atoms'' corresponds to the number of non-H atoms.
``Method'' provides the protocol employed to compute the TBEs.
The nature of the excitation is also provided: V, R, and CT stands for valence, Rydberg, and charge transfer, respectively.
All quantities are obtained with the aug-cc-pVTZ basis.
All quantities are obtained with the aug-cc-pVTZ basis (AVXZ stands for aug-cc-pVXZ).
\label{tab:TBE}}
\\
\hline
\# &\thead{Molecule} & \thead{Excitation} & \thead{Nature} & \thead{$\%T_1$} & \thead{f} & \thead{TBE} & \thead{Method} & \thead{Safe?}\\
\hline
\endfirsthead
\multicolumn{9}{c}{\tablename\ \thetable\ -- \textit{Continued from previous page}} \\
\hline
\thead{\#} &\thead{Molecule} & \thead{Excitation} & \thead{Nature} & \thead{$\%T_1$} & \thead{f} & \thead{TBE} & \thead{Method} & \thead{Safe?}\\
\hline
\endhead
\hline \multicolumn{9}{r}{\textit{Continued on next page}} \\
\endfoot
@ -578,10 +587,10 @@ All quantities are obtained with the aug-cc-pVTZ basis.
29 & & $^3A_2 (n \ra 3s)$ & R & 98 & & 6.31 & FCI/AVTZ & Y \\
30 & Aza-naphthalene & $^1B_{3g} (n \ra \pi^*)$ & V & 88 & & 3.14 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
31 & & $^1B_{2u} (\pi \ra \pi^*)$ & V & 86 & 0.19 & 4.28 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
32 & & $^1B_{1u} (n \ra \pi^*)$ & V & 88 & n.d. & 4.34 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
32 & & $^1B_{1u} (n \ra \pi^*)$ & V & 88 & --- & 4.34 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
33 & & $^1B_{2g} (n \ra \pi^*)$ & V & 87 & & 4.55 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
34 & & $^1B_{2g} (n \ra \pi^*)$ & V & 84 & & 4.89 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
35 & & $^1B_{1u} (n \ra \pi^*)$ & V & 82 & n.d. & 5.24 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & N \\
35 & & $^1B_{1u} (n \ra \pi^*)$ & V & 82 & --- & 5.24 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & N \\
36 & & $^1A_u (n \ra \pi^*)$ & V & 83 & & 5.34 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
37 & & $^1B_{3u} (\pi \ra \pi^*)$ & V & 88 & 0.028 & 5.68 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & N \\
38 & & $^1A_g (\pi \ra \pi^*)$ & V & 85 & & 5.8 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
@ -884,7 +893,7 @@ All quantities are obtained with the aug-cc-pVTZ basis.
335 & & $^1A' ()$ & R & 92 & 0.038 & 6.27 & CCSDTQ/AVDZ + [CCSDT/AVTZ - CCSDT/AVDZ] & Y \\
336 & & $^3A'' (n \ra \pi^*)$ & V & 99 & & 0.88 & FCI/AVTZ & Y \\
337 & & $^3A' (\pi \ra \pi^*)$ & V & 98 & & 5.61 & FCI/AVTZ & Y \\
338 & Octatetraene & $^1B_u (\pi \ra \pi^*)$ & V & 91 & & 4.78 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
338 & Octatetraene & $^1B_u (\pi \ra \pi^*)$ & V & 91 & --- & 4.78 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & Y \\
339 & & $^1A_g (\pi \ra \pi^*)$ & V & 63 & & 4.9 & CCSDT/6-31+G(d) + [CC3/AVTZ - CC3/6-31+G(d)] & N \\
340 & & $^3B_u (\pi \ra \pi^*)$ & V & 97 & & 2.36 & CC3/AVTZ & N \\
341 & & $^3A_g (\pi \ra \pi^*)$ & V & 98 & & 3.73 & CC3/AVTZ & N \\
@ -1040,14 +1049,11 @@ All quantities are obtained with the aug-cc-pVTZ basis.
\section{Benchmarks}
\label{sec:bench}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In this section, we report a comprehensive benchmark of various lower-order methods on the entire QUEST set which is composed by more than \alert{470} excitations.
In this section, we report a comprehensive benchmark of various lower-order methods on the entire QUEST dataset.
Statistical quantities are reported in Table \ref{tab:stat}.
Additionally, we also provide a specific analysis for each type of excited states.
Hence, the statistical values are reported for various types of excited states and molecular sizes for the MSE and MAE.
The distribution of the errors in vertical excitation energies (with respect to the TBE/aug-cc-pVTZ reference values) are represented in Fig.~\ref{fig:QUEST_stat} for all the single excitations of the entire QUEST database.
%En SI au moins des histogrammes pour ttes les approches.
% Un graphe avec les MAE en finctions ds subsets pour une m<>me mŽthode, surement por la taille.
The distribution of the errors in vertical excitation energies (with respect to the TBE/aug-cc-pVTZ reference values) are represented in Fig.~\ref{fig:QUEST_stat} for all the safe excitations of the entire QUEST database.
\begin{sidewaystable}
\scriptsize
@ -1099,12 +1105,12 @@ MAE & & 0.22 & 0.16 & 0.22 & 0.11 & 0.12 & 0.05 & 0.04 & 0.02 & 0.20 & 0.22
\end{sidewaystable}
\begin{figure}
\includegraphics[width=\textwidth]{histograms}
\centering
\includegraphics[width=0.9\textwidth]{histograms}
\caption{Distribution of the error (in eV) in excitation energies (with respect to TBE/aug-cc-pVTZ values) for various methods for the entire QUEST database.
Only the ``safe'' TBEs are considered (see Table \ref{tab:TBE}).
See Table \ref{tab:stat} for the values of the corresponding statistical quantities.
% The boxes contain the data between first and third quartiles, and the line in the box represents the median.
% The outliers are shown as dots.
QC and TM indicate that Q-CHEM and TURBOMOLE scaling factor are considered, respectively.
\label{fig:QUEST_stat}}
\end{figure}

Binary file not shown.