3
0
mirror of https://github.com/triqs/dft_tools synced 2024-10-31 19:23:45 +01:00

Add statistic tools

- binning, jackknife, autocorrelation, observable.
- DRAFT only : in development, debug. Doc to be written.
This commit is contained in:
tayral 2014-02-11 21:06:19 +01:00 committed by Olivier Parcollet
parent e182762bf2
commit 87dc9aeaa5
7 changed files with 657 additions and 0 deletions

View File

@ -0,0 +1,2 @@
all_tests()

View File

@ -0,0 +1,62 @@
#include <triqs/arrays.hpp>
#include <triqs/statistics.hpp>
#include "./correlated_gaussian.hpp"
#include <iostream>
#define TEST(X) std::cout << BOOST_PP_STRINGIZE((X)) << " ---> "<< (X) <<std::endl<<std::endl;
using namespace triqs::statistics;
void test_0(){
//very simple test to test that everything works
observable<double> X;
X<<1.0;
X<<-1.0;
X<<.5;
X<<.0;
std::cout << average_and_error(X) << std::endl;
std::cout << average_and_error(X*X) << std::endl;
}
void test_1(int argc, char ** argv){
int N=100000, L=100;
if(argc==3){
N = atoi(argv[1]); //size
L = atoi(argv[2]); //autocorrelation time
}
std::cout << "N = " << N << std::endl;
std::cout << "L = " << L << std::endl;
int seed= 1567;
std::vector<double> A(N);
correlated_gaussian_vector(A, seed, L);
double intrinsic_variance = 1;
TEST( autocorrelation_time(A));
TEST( autocorrelation_time_from_binning(A,intrinsic_variance));
TEST( autocorrelation_time_from_binning(A));
}
void test_2(int argc, char ** argv){
int N=10000, L=40;
if(argc==3){
N = atoi(argv[1]); //size
L = atoi(argv[2]); //autocorrelation time
}
int seed= 1567;
double avg = 2;
std::vector<double> A(N);
correlated_gaussian_vector(A, seed, L, avg);
observable<double> V;
for (auto & x:A) V << x;
TEST(autocorrelation_time(V));
TEST(autocorrelation_time(V*V));
}
int main(int argc, char ** argv){
try{ test_0(); } catch(std::exception const & e){ std::cerr << e.what() << std::endl;}
try{ test_1(argc,argv); } catch(std::exception const & e){ std::cerr << e.what() << std::endl;}
try{ test_2(argc,argv); } catch(std::exception const & e){ std::cerr << e.what() << std::endl;}
}

View File

@ -0,0 +1,14 @@
0.125 +/- 0.426956
0.0763889 +/- 0.174719
N = 100000
L = 100
(autocorrelation_time(A)) ---> 89
(autocorrelation_time_from_binning(A,intrinsic_variance)) ---> 80.2806
(autocorrelation_time_from_binning(A)) ---> 106.764
(autocorrelation_time(V)) ---> 41
(autocorrelation_time(V*V)) ---> 40

View File

@ -0,0 +1,32 @@
#pragma once
#include <boost/random/variate_generator.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/normal_distribution.hpp>
using uint = unsigned int;
template <typename TimeSeries> void boost_independent_gaussian_vector(TimeSeries& t, int seed) {
boost::variate_generator<boost::mt19937, boost::normal_distribution<>> generator((boost::mt19937(seed)),
(boost::normal_distribution<>()));
for (size_t i = 0; i < t.size(); ++i) t[i] = generator();
}
template <typename TimeSeries> void correlated_gaussian_vector(TimeSeries& t, int seed, size_t correlation_length) {
boost_independent_gaussian_vector(t, seed);
TimeSeries B(t.size());
B[0] = t[0];
double f = exp(-1. / correlation_length);
for (size_t i = 1; i < t.size(); i++) B[i] = f * B[i - 1] + sqrt(1 - f * f) * t[i];
t = B;
}
template <typename TimeSeries> void correlated_gaussian_vector(TimeSeries& t, int seed, size_t correlation_length, double avg) {
boost_independent_gaussian_vector(t, seed);
TimeSeries B(t.size());
B[0] = t[0];
double f = exp(-1. / correlation_length);
for (size_t i = 1; i < t.size(); i++) B[i] = f * B[i - 1] + sqrt(1 - f * f) * t[i];
t = B;
for (size_t i = 1; i < t.size(); i++) t[i] = t[i] + avg;
}

View File

@ -0,0 +1,64 @@
#include <triqs/arrays.hpp>
#include <triqs/statistics.hpp>
#include <iostream>
using triqs::statistics::observable;
using namespace triqs::arrays;
template <typename T>
std::ostream &operator<<(std::ostream &out, std::vector<T> const &v) {
for (auto const &x : v)
out << x << " ";
return out;
}
int main() {
try {
{
auto A = observable<double>{};
for (int i = 0; i < 1000; ++i)
A << 6;
std::cout << average_and_error(A) << std::endl;
}
{
auto A = observable<array<double, 2> >{};
for (int i = 0; i < 1000; ++i)
A << array<double, 2>{ { i, 2 * i }, { 3 * i, 4 * i } };
for (int i = 0; i < 1000; ++i)
A << 2 * array<double, 2>{ { i, 2 * i }, { 3 * i, 4 * i } };
std::cout << average(A) << std::endl;
std::cout << average_and_error(A) << std::endl;
}
{
observable<double> A, B;
for (int i = 0; i < 1000; ++i) {
A << i;
B << 5;
}
// operations
auto ab = A / B;
//auto ab_j = make_jackknife(A) / make_jackknife(B);
double r = eval(ab, 1);
std::cout << "eval A/B in 1 " << r << std::endl;
r = eval(cos(A), 1);
std::cout << r << " == " << std::cos(1) << std::endl;
std::cout << "<A/B> = "<< average(A / B) << std::endl;
std::cout << average_and_error(A / B) << std::endl;
std::cout << average_and_error(cos(A)) << std::endl;
std::cout << average_and_error(cos(A) / B) << std::endl;
}
}
TRIQS_CATCH_AND_ABORT;
}

23
triqs/statistics.hpp Normal file
View File

@ -0,0 +1,23 @@
/*******************************************************************************
*
* TRIQS: a Toolbox for Research in Interacting Quantum Systems
*
* Copyright (C) 2011-2013 by O. Parcollet
*
* TRIQS is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* TRIQS. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#pragma once
#include "./statistics/statistics.hpp"

View File

@ -0,0 +1,460 @@
/*******************************************************************************
*
* TRIQS: a Toolbox for Research in Interacting Quantum Systems
*
* Copyright (C) 2014 by T. Ayral, O. Parcollet
*
* TRIQS is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* TRIQS. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#pragma once
#include <triqs/utility/c14.hpp>
#include <triqs/utility/exceptions.hpp>
#include <triqs/utility/tuple_tools.hpp>
#include <type_traits>
#include <vector>
#include <cmath>
#include <boost/iterator/iterator_facade.hpp>
namespace triqs {
namespace statistics {
// trait to find out if T models the concept TimeSeries
template <typename T> struct is_time_series : std::false_type {};
template <typename T> struct is_time_series<T&> : is_time_series<T> {};
template <typename T> struct is_time_series<T&&> : is_time_series<T> {};
template <typename T> struct is_time_series<T const> : is_time_series<T> {};
template <typename T> struct is_time_series<std::vector<T>> : std::true_type {};
/* *********************************************************
*
* Binning
*
* ********************************************************/
template <typename ValueType> class binned_series {
int bin_size;
std::vector<ValueType> binned;
public:
using value_type = ValueType;
template <typename TimeSeries>
binned_series(TimeSeries const& t, int bin_size_)
: bin_size(bin_size_), binned(t.size() / bin_size_, value_type{}) {
if (bin_size_ > t.size())
TRIQS_RUNTIME_ERROR << "bin size (" << bin_size_ << ") cannot be larger than size (" << t.size() << ") of time series";
for (int i = 0; i < size(); i++) {
for (int j = 0; j < bin_size; j++) binned[i] += t[i * bin_size + j];
binned[i] /= bin_size;
}
}
value_type operator[](int i) const { return binned[i]; }
int size() const { return binned.size(); }
using const_iterator = typename std::vector<ValueType>::const_iterator;
const_iterator begin() const { return binned.begin(); }
const_iterator end() const { return binned.end(); }
friend std::ostream& operator<<(std::ostream& out, binned_series const& s_) {
for (auto const& x : s_.binned) out << x << " ";
return out;
}
};
template <typename T> struct is_time_series<binned_series<T>> : std::true_type {};
/// Factory
template <typename TimeSeries>
binned_series<typename TimeSeries::value_type> make_binned_series(TimeSeries const& t, int bin_size) {
return {t, bin_size};
}
/* *********************************************************
*
* TS_observer: an implementation class
* Contains a ref or a value to a TS, and the implementation of the const_iterator
*
* ********************************************************/
template <typename Derived, typename TimeSeries> class ts_observer { // TimeSeries can be a T or a T &
protected:
TimeSeries ts;
public:
using value_type = typename std::remove_reference<TimeSeries>::type::value_type;
template <typename TS> ts_observer(TS&& t_) : ts(std::forward<TS>(t_)) {}
ts_observer(ts_observer const&) = default;
ts_observer(ts_observer&&) = default;
ts_observer& operator=(ts_observer const&) = delete;
ts_observer& operator=(ts_observer&&) = default;
int size() const { return ts.size(); }
// const_iterator
class const_iterator : public boost::iterator_facade<const_iterator, value_type, boost::forward_traversal_tag, value_type> {
friend class boost::iterator_core_access;
Derived const* t;
int u;
void increment() { ++u; }
value_type dereference() const { return (*t)[u]; }
bool equal(const_iterator const& other) const { return ((t == other.t) && (other.u == u)); }
public:
const_iterator(Derived const* m, bool at_end) : t(m) { u = (at_end ? m->size() : 0); }
};
const_iterator begin() const {
return {static_cast<Derived const*>(this), false};
}
const_iterator end() const {
return {static_cast<Derived const*>(this), true};
}
const_iterator cbegin() const { return begin(); }
const_iterator cend() const { return end(); }
// printing
friend std::ostream& operator<<(std::ostream& out, ts_observer const& s_) {
for (auto const& x : s_) out << x << " ";
return out;
}
};
/* *********************************************************
*
* Jackknife
*
* ********************************************************/
template <typename TimeSeries> class jackknife : public ts_observer<jackknife<TimeSeries>, TimeSeries> {
using B = ts_observer<jackknife<TimeSeries>, TimeSeries>;
typename B::value_type sum;
public:
template <typename TS> jackknife(TS&& t_) : B(std::forward<TS>(t_)) {
sum = typename B::value_type{0 * this->ts[0]};
auto si = this->ts.size();
for (int i = 0; i < si; i++) sum += this->ts[i];
}
typename B::value_type operator[](int i) const { return (sum - this->ts[i]) / (this->size() - 1); }
};
///
template <typename TimeSeries> jackknife<TimeSeries> make_jackknife(TimeSeries&& t) {
return {std::forward<TimeSeries>(t)};
}
template <typename T> struct is_time_series<jackknife<T>> : std::true_type {};
/* *********************************************************
*
* Observable
*
* ********************************************************/
template <typename T> class observable {
std::vector<T> _series;
public:
observable() { _series.reserve(1000); }
observable& operator<<(T x) { // copy and move : check speed ... or overload const &, &&
_series.push_back(std::move(x));
return *this;
}
template <typename A> observable& operator<<(A&& a) {
_series.emplace_back(std::forward<A>(a));
return *this;
}
// TimeSeries concept
using value_type = T;
int size() const { return _series.size(); }
T operator[](int i) const { return _series[i]; }
};
template <typename T> struct is_time_series<observable<T>> : std::true_type {};
/* *********************************************************
*
* Expressions
*
* ********************************************************/
// -------------- clef leaf evaluation ----------------------
struct repl_by_jack {};
struct bin_and_repl_by_jack {
int bin_size;
};
template <typename T> auto eval(observable<T> const& obs, repl_by_jack) DECL_AND_RETURN(make_jackknife(obs));
template <typename T>
auto eval(observable<T> const& obs, bin_and_repl_by_jack info)
DECL_AND_RETURN(make_jackknife(make_binned_series(obs), info.bin_size));
template <typename TS> auto eval(TS const& obs, int i) -> std::c14::enable_if_t<is_time_series<TS>::value, decltype(obs[i])> {
return obs[i];
}
// --------- Operations --------------------------
// The principle is :
// All operations between a time_series and anything results in a clef lazy expression
// This implements the case of binary/unary operators when there is no clef expression (which is already handled by clef operators
// hence this avoid ambiguity).
#define TRIQS_TS_OPERATION(TAG, OP) \
template <typename L, typename R> \
std::c14::enable_if_t<(is_time_series<L>::value || is_time_series<R>::value) && (!clef::is_any_lazy<L, R>::value), \
clef::expr_node_t<clef::tags::TAG, L, R>> operator OP(L&& l, R&& r) { \
return {clef::tags::TAG(), std::forward<L>(l), std::forward<R>(r)}; \
}
TRIQS_TS_OPERATION(plus, +);
TRIQS_TS_OPERATION(minus, -);
TRIQS_TS_OPERATION(multiplies, *);
TRIQS_TS_OPERATION(divides, / );
#undef TRIQS_TS_OPERATION
// Any function overloaded for clef should also accept object modelling is_time_series
// Here : define all math function defined for clef ...
#define AUX(r, data, elem) TRIQS_CLEF_EXTEND_FNT_LAZY(elem, is_time_series)
BOOST_PP_SEQ_FOR_EACH(AUX, nil, TRIQS_CLEF_STD_MATH_FNT_TO_MAKE_LAZY);
#undef AUX
#undef TRIQS_TS_FUNCTION
// ------------- Dress an expression as a time serie --------------------------
template <typename Expr> struct _immutable_time_series {
Expr expr;
using value_type = typename std::remove_reference<decltype(eval(expr, 0))>::type;
value_type operator[](int i) const { return eval(expr, i); }
int size() const { return get_size(expr); }
};
template <typename T> struct is_time_series<_immutable_time_series<T>> : std::true_type {};
// make_immutable_time_series (x) returns :
// x if it is already a time_series
// _immutable_time_series(x) if it is an expression
template <typename T>
std::c14::enable_if_t<clef::is_clef_expression<T>::value, _immutable_time_series<T>> make_immutable_time_series(T&& x) {
return {std::forward<T>(x)};
}
template <typename T> std::c14::enable_if_t<is_time_series<T>::value, T> make_immutable_time_series(T&& x) {
return std::forward<T>(x);
}
// ------------- Computation of the size --------------------------
// a function object that when called on x, returns nothing but :
// if x models TimeSeries : check if its sizes is equal to previously encountered and stores it
// otherwise do nothing
struct _get_size_visitor {
int res;
template <typename T> std::c14::enable_if_t<!is_time_series<T>::value> operator()(T const&) {}
template <typename T> std::c14::enable_if_t<is_time_series<T>::value> operator()(T const& obs) {
int i = obs.size();
if ((res * i != 0) && (res != i)) TRIQS_RUNTIME_ERROR << "Expression of time series with time mismatch";
res = i; // keep the result
}
};
template <typename T> int get_size(T const& x) {
auto l = _get_size_visitor{0};
clef::apply_on_each_leaf(l, x);
return l.res;
}
/* *********************************************************
*
* Average and error
*
* ********************************************************/
// ------------- A value and its error --------------------------
template <typename T> struct value_and_error_bar {
T value, error_bar; // error is variance : a T???? complex ??
friend std::ostream& operator<<(std::ostream& out, value_and_error_bar const& ve) {
return out << ve.value << " +/- " << ve.error_bar;
}
};
// ------------- empirical average and variance --------------------------
template <typename TimeSeries> typename TimeSeries::value_type empirical_average(TimeSeries const& t) {
auto si = t.size();
if (si == 0) return typename TimeSeries::value_type{};
auto sum = t[0];
for (int i = 1; i < si; ++i) sum += t[i];
return sum / t.size();
}
///
template <typename TimeSeries> typename TimeSeries::value_type empirical_variance(TimeSeries const& t) {
auto si = t.size();
if (si == 0) return typename TimeSeries::value_type{};
auto avg = t[0];
decltype(avg) sum = t[0] * t[0]; // also valid if t[0] is an array e.g., i.e. no trivial contructor...
for (int i = 1; i < si; ++i) {
sum += t[i] * t[i];
avg += t[i];
}
avg /= t.size();
sum /= t.size();
return sum - avg * avg;
}
// ------------- Overload average for observables and expressions of observables --------------------------
template <typename T> T average(observable<T> const& obs) { return empirical_average(obs); }
template <typename ObservableExpr>
std::c14::enable_if_t<clef::is_clef_expression<ObservableExpr>::value, double> average(ObservableExpr const& obs) {
return empirical_average(_immutable_time_series<ObservableExpr>{obs});
}
// ------------- Overload average and error for observables and expressions of observables --------------------------
template <typename T> value_and_error_bar<typename T::value_type> empirical_average_and_error(T const& ts) {
using std::sqrt;
return {empirical_average(ts), sqrt((ts.size() - 1.0) * (empirical_variance(ts)))};
}
template <typename T> value_and_error_bar<T> average_and_error(observable<T> const& obs) {
auto ts = make_jackknife(obs);
return empirical_average_and_error(ts);
}
template <typename T> value_and_error_bar<T> average_and_error(observable<T> const& obs, int bin_size) {
auto ts = make_jackknife(make_binned_series(obs, bin_size));
return empirical_average_and_error(ts);
}
template <typename ObservableExpr>
std::c14::enable_if_t<clef::is_clef_expression<ObservableExpr>::value, value_and_error_bar<double>>
average_and_error(ObservableExpr const& obs) {
auto expr_jack = eval(obs, repl_by_jack{}); // replace every TS leaf by a jacknifed version
return empirical_average_and_error(make_immutable_time_series(expr_jack));
}
template <typename ObservableExpr>
std::c14::enable_if_t<clef::is_clef_expression<ObservableExpr>::value, value_and_error_bar<double>>
average_and_error(ObservableExpr const& obs, int bin_size) {
auto expr_bin_jack = eval(obs, bin_and_repl_by_jack{bin_size});
return empirical_average_and_error(make_immutable_time_series(expr_bin_jack));
}
/* *********************************************************
*
* Auto-correlations
*
* ********************************************************/
// ------ k-> (<f(a(k))f(a(0))> - <f(a)>^2 )/ (<f(a)^2> - <f(a)>^2 ) --------------------
template <typename TimeSeries>
class normalized_autocorrelation : public ts_observer<normalized_autocorrelation<TimeSeries>, TimeSeries> {
using B = ts_observer<normalized_autocorrelation<TimeSeries>, TimeSeries>;
typename B::value_type var, avg2;
public:
template <typename TS> normalized_autocorrelation(TS&& t_) : B(std::forward<TS>(t_)) {
var = empirical_variance(this->ts);
auto avg = empirical_average(this->ts);
avg2 = avg * avg;
}
typename B::value_type operator[](int k) const {
const int N = this->size();
auto r = typename B::value_type{0 * this->ts[0]};
for (int i = 0; i < N - k; i++) r += this->ts[i + k] * this->ts[i];
r = (r / (N - k) - avg2) / var;
return r;
}
};
///
template <typename TimeSeries> normalized_autocorrelation<TimeSeries> make_normalized_autocorrelation(TimeSeries&& t) {
return {std::forward<TimeSeries>(t)};
}
// ------ Auto-correlation time from the computation of the autocorrelation --------------------
template <typename TimeSeries> int autocorrelation_time(TimeSeries const& a) {
auto normalized_autocorr = make_normalized_autocorrelation(make_immutable_time_series(a)); // is a N*dim_f matrix...
double t_int = normalized_autocorr[0]; // in principle, a vector dim_f
double coeff_tau = 6; // if exponential decay -> 0.25 % precision
for (int l_max = 1; l_max < coeff_tau * t_int; l_max++) t_int += normalized_autocorr[l_max];
return int(t_int);
}
// ------ Auto-correlation time from binning --------------------
template <typename TimeSeries>
double autocorrelation_time_from_binning(TimeSeries const& A, double intrinsic_variance, double precision = 0.05) {
auto size = make_immutable_time_series(A).size();
auto t_cor = [&](int b) {
auto A_binned = make_binned_series(make_immutable_time_series(A), b);
double var = empirical_variance(A_binned);
return 0.5 * (b * var / intrinsic_variance - 1.);
};
double coeff = -6 * std::log(precision); // heuristic -> 0.25 % precision
int B = 2;
while (B < coeff * std::abs(t_cor(B))) ++B;
// now, B is large enough: average over a few estimates
int Navg = std::min(100, int(size - B));
double autocorr_time = 0.;
for (int b = 0; b < Navg; b++) autocorr_time += t_cor(B + b);
return autocorr_time / Navg;
}
// ------ Auto-correlation time from binning --------------------
template <typename TimeSeries> double autocorrelation_time_from_binning(TimeSeries const& A) {
auto size = make_immutable_time_series(A).size();
double var1 = empirical_variance(make_immutable_time_series(A));
auto t_cor = [&](int b) {
auto A_binned = make_binned_series(make_immutable_time_series(A), b);
double var = empirical_variance(A_binned);
return .5 * var / var1 * b;
};
int B = 2;
double autocorr_time = t_cor(B);
double slope = 1.;
int small_slope_count = 0;
std::vector<double> t;
while (small_slope_count < 100 && B < size / 10) {
double t_cor_new = t_cor(++B);
slope = (std::abs(t_cor_new - autocorr_time) / autocorr_time);
if (slope < 1e-5) small_slope_count++;
if (small_slope_count > 0) t.push_back(t_cor_new);
autocorr_time = t_cor_new;
}
return empirical_average(t);
}
} // namespace statistics
} // triqs