From 87dc9aeaa519d70213b0729591059960058dce4f Mon Sep 17 00:00:00 2001 From: tayral Date: Tue, 11 Feb 2014 21:06:19 +0100 Subject: [PATCH] Add statistic tools - binning, jackknife, autocorrelation, observable. - DRAFT only : in development, debug. Doc to be written. --- test/triqs/statistics/CMakeLists.txt | 2 + .../triqs/statistics/autocorrelation_time.cpp | 62 +++ .../statistics/autocorrelation_time.output | 14 + test/triqs/statistics/correlated_gaussian.hpp | 32 ++ test/triqs/statistics/observable.cpp | 64 +++ triqs/statistics.hpp | 23 + triqs/statistics/statistics.hpp | 460 ++++++++++++++++++ 7 files changed, 657 insertions(+) create mode 100644 test/triqs/statistics/CMakeLists.txt create mode 100644 test/triqs/statistics/autocorrelation_time.cpp create mode 100644 test/triqs/statistics/autocorrelation_time.output create mode 100644 test/triqs/statistics/correlated_gaussian.hpp create mode 100644 test/triqs/statistics/observable.cpp create mode 100644 triqs/statistics.hpp create mode 100644 triqs/statistics/statistics.hpp diff --git a/test/triqs/statistics/CMakeLists.txt b/test/triqs/statistics/CMakeLists.txt new file mode 100644 index 00000000..917a5baf --- /dev/null +++ b/test/triqs/statistics/CMakeLists.txt @@ -0,0 +1,2 @@ +all_tests() + diff --git a/test/triqs/statistics/autocorrelation_time.cpp b/test/triqs/statistics/autocorrelation_time.cpp new file mode 100644 index 00000000..216740b1 --- /dev/null +++ b/test/triqs/statistics/autocorrelation_time.cpp @@ -0,0 +1,62 @@ +#include +#include +#include "./correlated_gaussian.hpp" +#include +#define TEST(X) std::cout << BOOST_PP_STRINGIZE((X)) << " ---> "<< (X) < X; + X<<1.0; + X<<-1.0; + X<<.5; + X<<.0; + std::cout << average_and_error(X) << std::endl; + std::cout << average_and_error(X*X) << std::endl; + +} + +void test_1(int argc, char ** argv){ + int N=100000, L=100; + if(argc==3){ + N = atoi(argv[1]); //size + L = atoi(argv[2]); //autocorrelation time + } + std::cout << "N = " << N << std::endl; + std::cout << "L = " << L << std::endl; + int seed= 1567; + std::vector A(N); + correlated_gaussian_vector(A, seed, L); + double intrinsic_variance = 1; + + TEST( autocorrelation_time(A)); + TEST( autocorrelation_time_from_binning(A,intrinsic_variance)); + TEST( autocorrelation_time_from_binning(A)); +} + +void test_2(int argc, char ** argv){ + int N=10000, L=40; + if(argc==3){ + N = atoi(argv[1]); //size + L = atoi(argv[2]); //autocorrelation time + } + int seed= 1567; + double avg = 2; + std::vector A(N); + correlated_gaussian_vector(A, seed, L, avg); + + observable V; + for (auto & x:A) V << x; + TEST(autocorrelation_time(V)); + TEST(autocorrelation_time(V*V)); +} + + +int main(int argc, char ** argv){ + try{ test_0(); } catch(std::exception const & e){ std::cerr << e.what() << std::endl;} + try{ test_1(argc,argv); } catch(std::exception const & e){ std::cerr << e.what() << std::endl;} + try{ test_2(argc,argv); } catch(std::exception const & e){ std::cerr << e.what() << std::endl;} +} + diff --git a/test/triqs/statistics/autocorrelation_time.output b/test/triqs/statistics/autocorrelation_time.output new file mode 100644 index 00000000..0cd6af3b --- /dev/null +++ b/test/triqs/statistics/autocorrelation_time.output @@ -0,0 +1,14 @@ +0.125 +/- 0.426956 +0.0763889 +/- 0.174719 +N = 100000 +L = 100 +(autocorrelation_time(A)) ---> 89 + +(autocorrelation_time_from_binning(A,intrinsic_variance)) ---> 80.2806 + +(autocorrelation_time_from_binning(A)) ---> 106.764 + +(autocorrelation_time(V)) ---> 41 + +(autocorrelation_time(V*V)) ---> 40 + diff --git a/test/triqs/statistics/correlated_gaussian.hpp b/test/triqs/statistics/correlated_gaussian.hpp new file mode 100644 index 00000000..7b19bb46 --- /dev/null +++ b/test/triqs/statistics/correlated_gaussian.hpp @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include +using uint = unsigned int; + +template void boost_independent_gaussian_vector(TimeSeries& t, int seed) { + boost::variate_generator> generator((boost::mt19937(seed)), + (boost::normal_distribution<>())); + for (size_t i = 0; i < t.size(); ++i) t[i] = generator(); +} + +template void correlated_gaussian_vector(TimeSeries& t, int seed, size_t correlation_length) { + boost_independent_gaussian_vector(t, seed); + TimeSeries B(t.size()); + B[0] = t[0]; + double f = exp(-1. / correlation_length); + for (size_t i = 1; i < t.size(); i++) B[i] = f * B[i - 1] + sqrt(1 - f * f) * t[i]; + t = B; +} + + +template void correlated_gaussian_vector(TimeSeries& t, int seed, size_t correlation_length, double avg) { + boost_independent_gaussian_vector(t, seed); + TimeSeries B(t.size()); + B[0] = t[0]; + double f = exp(-1. / correlation_length); + for (size_t i = 1; i < t.size(); i++) B[i] = f * B[i - 1] + sqrt(1 - f * f) * t[i]; + t = B; + for (size_t i = 1; i < t.size(); i++) t[i] = t[i] + avg; +} + diff --git a/test/triqs/statistics/observable.cpp b/test/triqs/statistics/observable.cpp new file mode 100644 index 00000000..5886e4f0 --- /dev/null +++ b/test/triqs/statistics/observable.cpp @@ -0,0 +1,64 @@ +#include +#include +#include + +using triqs::statistics::observable; +using namespace triqs::arrays; + +template +std::ostream &operator<<(std::ostream &out, std::vector const &v) { + for (auto const &x : v) + out << x << " "; + return out; +} + +int main() { + + try { + { + auto A = observable{}; + + for (int i = 0; i < 1000; ++i) + A << 6; + std::cout << average_and_error(A) << std::endl; + } + + { + auto A = observable >{}; + + for (int i = 0; i < 1000; ++i) + A << array{ { i, 2 * i }, { 3 * i, 4 * i } }; + + for (int i = 0; i < 1000; ++i) + A << 2 * array{ { i, 2 * i }, { 3 * i, 4 * i } }; + + std::cout << average(A) << std::endl; + std::cout << average_and_error(A) << std::endl; + } + + { + observable A, B; + + for (int i = 0; i < 1000; ++i) { + A << i; + B << 5; + } + + // operations + auto ab = A / B; + //auto ab_j = make_jackknife(A) / make_jackknife(B); + double r = eval(ab, 1); + std::cout << "eval A/B in 1 " << r << std::endl; + + r = eval(cos(A), 1); + std::cout << r << " == " << std::cos(1) << std::endl; + + std::cout << " = "<< average(A / B) << std::endl; + std::cout << average_and_error(A / B) << std::endl; + std::cout << average_and_error(cos(A)) << std::endl; + std::cout << average_and_error(cos(A) / B) << std::endl; + } + } + TRIQS_CATCH_AND_ABORT; +} + diff --git a/triqs/statistics.hpp b/triqs/statistics.hpp new file mode 100644 index 00000000..27ec0eee --- /dev/null +++ b/triqs/statistics.hpp @@ -0,0 +1,23 @@ +/******************************************************************************* + * + * TRIQS: a Toolbox for Research in Interacting Quantum Systems + * + * Copyright (C) 2011-2013 by O. Parcollet + * + * TRIQS is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * TRIQS. If not, see . + * + ******************************************************************************/ +#pragma once +#include "./statistics/statistics.hpp" + diff --git a/triqs/statistics/statistics.hpp b/triqs/statistics/statistics.hpp new file mode 100644 index 00000000..01bf89f7 --- /dev/null +++ b/triqs/statistics/statistics.hpp @@ -0,0 +1,460 @@ +/******************************************************************************* + * + * TRIQS: a Toolbox for Research in Interacting Quantum Systems + * + * Copyright (C) 2014 by T. Ayral, O. Parcollet + * + * TRIQS is free software: you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your option) any later + * version. + * + * TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * TRIQS. If not, see . + * + ******************************************************************************/ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace triqs { +namespace statistics { + + // trait to find out if T models the concept TimeSeries + template struct is_time_series : std::false_type {}; + template struct is_time_series : is_time_series {}; + template struct is_time_series : is_time_series {}; + template struct is_time_series : is_time_series {}; + template struct is_time_series> : std::true_type {}; + + /* ********************************************************* + * + * Binning + * + * ********************************************************/ + + template class binned_series { + int bin_size; + std::vector binned; + + public: + using value_type = ValueType; + + template + binned_series(TimeSeries const& t, int bin_size_) + : bin_size(bin_size_), binned(t.size() / bin_size_, value_type{}) { + if (bin_size_ > t.size()) + TRIQS_RUNTIME_ERROR << "bin size (" << bin_size_ << ") cannot be larger than size (" << t.size() << ") of time series"; + for (int i = 0; i < size(); i++) { + for (int j = 0; j < bin_size; j++) binned[i] += t[i * bin_size + j]; + binned[i] /= bin_size; + } + } + + value_type operator[](int i) const { return binned[i]; } + int size() const { return binned.size(); } + + using const_iterator = typename std::vector::const_iterator; + const_iterator begin() const { return binned.begin(); } + const_iterator end() const { return binned.end(); } + + friend std::ostream& operator<<(std::ostream& out, binned_series const& s_) { + for (auto const& x : s_.binned) out << x << " "; + return out; + } + }; + + template struct is_time_series> : std::true_type {}; + + /// Factory + template + binned_series make_binned_series(TimeSeries const& t, int bin_size) { + return {t, bin_size}; + } + + /* ********************************************************* + * + * TS_observer: an implementation class + * Contains a ref or a value to a TS, and the implementation of the const_iterator + * + * ********************************************************/ + template class ts_observer { // TimeSeries can be a T or a T & + protected: + TimeSeries ts; + + public: + using value_type = typename std::remove_reference::type::value_type; + + template ts_observer(TS&& t_) : ts(std::forward(t_)) {} + + ts_observer(ts_observer const&) = default; + ts_observer(ts_observer&&) = default; + ts_observer& operator=(ts_observer const&) = delete; + ts_observer& operator=(ts_observer&&) = default; + + int size() const { return ts.size(); } + + // const_iterator + class const_iterator : public boost::iterator_facade { + friend class boost::iterator_core_access; + Derived const* t; + int u; + void increment() { ++u; } + value_type dereference() const { return (*t)[u]; } + bool equal(const_iterator const& other) const { return ((t == other.t) && (other.u == u)); } + + public: + const_iterator(Derived const* m, bool at_end) : t(m) { u = (at_end ? m->size() : 0); } + }; + + const_iterator begin() const { + return {static_cast(this), false}; + } + const_iterator end() const { + return {static_cast(this), true}; + } + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + // printing + friend std::ostream& operator<<(std::ostream& out, ts_observer const& s_) { + for (auto const& x : s_) out << x << " "; + return out; + } + }; + + /* ********************************************************* + * + * Jackknife + * + * ********************************************************/ + template class jackknife : public ts_observer, TimeSeries> { + using B = ts_observer, TimeSeries>; + typename B::value_type sum; + + public: + template jackknife(TS&& t_) : B(std::forward(t_)) { + sum = typename B::value_type{0 * this->ts[0]}; + auto si = this->ts.size(); + for (int i = 0; i < si; i++) sum += this->ts[i]; + } + + typename B::value_type operator[](int i) const { return (sum - this->ts[i]) / (this->size() - 1); } + }; + + /// + template jackknife make_jackknife(TimeSeries&& t) { + return {std::forward(t)}; + } + + template struct is_time_series> : std::true_type {}; + + /* ********************************************************* + * + * Observable + * + * ********************************************************/ + + template class observable { + std::vector _series; + + public: + observable() { _series.reserve(1000); } + + observable& operator<<(T x) { // copy and move : check speed ... or overload const &, && + _series.push_back(std::move(x)); + return *this; + } + template observable& operator<<(A&& a) { + _series.emplace_back(std::forward(a)); + return *this; + } + + // TimeSeries concept + using value_type = T; + int size() const { return _series.size(); } + T operator[](int i) const { return _series[i]; } + }; + template struct is_time_series> : std::true_type {}; + + /* ********************************************************* + * + * Expressions + * + * ********************************************************/ + + // -------------- clef leaf evaluation ---------------------- + + struct repl_by_jack {}; + + struct bin_and_repl_by_jack { + int bin_size; + }; + + template auto eval(observable const& obs, repl_by_jack) DECL_AND_RETURN(make_jackknife(obs)); + + template + auto eval(observable const& obs, bin_and_repl_by_jack info) + DECL_AND_RETURN(make_jackknife(make_binned_series(obs), info.bin_size)); + + template auto eval(TS const& obs, int i) -> std::c14::enable_if_t::value, decltype(obs[i])> { + return obs[i]; + } + +// --------- Operations -------------------------- + +// The principle is : +// All operations between a time_series and anything results in a clef lazy expression +// This implements the case of binary/unary operators when there is no clef expression (which is already handled by clef operators +// hence this avoid ambiguity). +#define TRIQS_TS_OPERATION(TAG, OP) \ + template \ + std::c14::enable_if_t<(is_time_series::value || is_time_series::value) && (!clef::is_any_lazy::value), \ + clef::expr_node_t> operator OP(L&& l, R&& r) { \ + return {clef::tags::TAG(), std::forward(l), std::forward(r)}; \ + } + + TRIQS_TS_OPERATION(plus, +); + TRIQS_TS_OPERATION(minus, -); + TRIQS_TS_OPERATION(multiplies, *); + TRIQS_TS_OPERATION(divides, / ); +#undef TRIQS_TS_OPERATION + +// Any function overloaded for clef should also accept object modelling is_time_series +// Here : define all math function defined for clef ... +#define AUX(r, data, elem) TRIQS_CLEF_EXTEND_FNT_LAZY(elem, is_time_series) + BOOST_PP_SEQ_FOR_EACH(AUX, nil, TRIQS_CLEF_STD_MATH_FNT_TO_MAKE_LAZY); +#undef AUX + +#undef TRIQS_TS_FUNCTION + + // ------------- Dress an expression as a time serie -------------------------- + + template struct _immutable_time_series { + Expr expr; + using value_type = typename std::remove_reference::type; + value_type operator[](int i) const { return eval(expr, i); } + int size() const { return get_size(expr); } + }; + + template struct is_time_series<_immutable_time_series> : std::true_type {}; + + // make_immutable_time_series (x) returns : + // x if it is already a time_series + // _immutable_time_series(x) if it is an expression + template + std::c14::enable_if_t::value, _immutable_time_series> make_immutable_time_series(T&& x) { + return {std::forward(x)}; + } + template std::c14::enable_if_t::value, T> make_immutable_time_series(T&& x) { + return std::forward(x); + } + + // ------------- Computation of the size -------------------------- + + // a function object that when called on x, returns nothing but : + // if x models TimeSeries : check if its sizes is equal to previously encountered and stores it + // otherwise do nothing + struct _get_size_visitor { + int res; + template std::c14::enable_if_t::value> operator()(T const&) {} + template std::c14::enable_if_t::value> operator()(T const& obs) { + int i = obs.size(); + if ((res * i != 0) && (res != i)) TRIQS_RUNTIME_ERROR << "Expression of time series with time mismatch"; + res = i; // keep the result + } + }; + + template int get_size(T const& x) { + auto l = _get_size_visitor{0}; + clef::apply_on_each_leaf(l, x); + return l.res; + } + + /* ********************************************************* + * + * Average and error + * + * ********************************************************/ + + // ------------- A value and its error -------------------------- + + template struct value_and_error_bar { + T value, error_bar; // error is variance : a T???? complex ?? + friend std::ostream& operator<<(std::ostream& out, value_and_error_bar const& ve) { + return out << ve.value << " +/- " << ve.error_bar; + } + }; + + // ------------- empirical average and variance -------------------------- + + template typename TimeSeries::value_type empirical_average(TimeSeries const& t) { + auto si = t.size(); + if (si == 0) return typename TimeSeries::value_type{}; + auto sum = t[0]; + for (int i = 1; i < si; ++i) sum += t[i]; + return sum / t.size(); + } + + /// + template typename TimeSeries::value_type empirical_variance(TimeSeries const& t) { + auto si = t.size(); + if (si == 0) return typename TimeSeries::value_type{}; + auto avg = t[0]; + decltype(avg) sum = t[0] * t[0]; // also valid if t[0] is an array e.g., i.e. no trivial contructor... + for (int i = 1; i < si; ++i) { + sum += t[i] * t[i]; + avg += t[i]; + } + avg /= t.size(); + sum /= t.size(); + return sum - avg * avg; + } + + + // ------------- Overload average for observables and expressions of observables -------------------------- + + template T average(observable const& obs) { return empirical_average(obs); } + + template + std::c14::enable_if_t::value, double> average(ObservableExpr const& obs) { + return empirical_average(_immutable_time_series{obs}); + } + + // ------------- Overload average and error for observables and expressions of observables -------------------------- + + template value_and_error_bar empirical_average_and_error(T const& ts) { + using std::sqrt; + return {empirical_average(ts), sqrt((ts.size() - 1.0) * (empirical_variance(ts)))}; + } + + template value_and_error_bar average_and_error(observable const& obs) { + auto ts = make_jackknife(obs); + return empirical_average_and_error(ts); + } + + template value_and_error_bar average_and_error(observable const& obs, int bin_size) { + auto ts = make_jackknife(make_binned_series(obs, bin_size)); + return empirical_average_and_error(ts); + } + + template + std::c14::enable_if_t::value, value_and_error_bar> + average_and_error(ObservableExpr const& obs) { + auto expr_jack = eval(obs, repl_by_jack{}); // replace every TS leaf by a jacknifed version + return empirical_average_and_error(make_immutable_time_series(expr_jack)); + } + + template + std::c14::enable_if_t::value, value_and_error_bar> + average_and_error(ObservableExpr const& obs, int bin_size) { + auto expr_bin_jack = eval(obs, bin_and_repl_by_jack{bin_size}); + return empirical_average_and_error(make_immutable_time_series(expr_bin_jack)); + } + + /* ********************************************************* + * + * Auto-correlations + * + * ********************************************************/ + + // ------ k-> ( - ^2 )/ ( - ^2 ) -------------------- + template + class normalized_autocorrelation : public ts_observer, TimeSeries> { + using B = ts_observer, TimeSeries>; + typename B::value_type var, avg2; + + public: + template normalized_autocorrelation(TS&& t_) : B(std::forward(t_)) { + var = empirical_variance(this->ts); + auto avg = empirical_average(this->ts); + avg2 = avg * avg; + } + + typename B::value_type operator[](int k) const { + const int N = this->size(); + auto r = typename B::value_type{0 * this->ts[0]}; + for (int i = 0; i < N - k; i++) r += this->ts[i + k] * this->ts[i]; + r = (r / (N - k) - avg2) / var; + return r; + } + }; + + /// + template normalized_autocorrelation make_normalized_autocorrelation(TimeSeries&& t) { + return {std::forward(t)}; + } + + // ------ Auto-correlation time from the computation of the autocorrelation -------------------- + + template int autocorrelation_time(TimeSeries const& a) { + auto normalized_autocorr = make_normalized_autocorrelation(make_immutable_time_series(a)); // is a N*dim_f matrix... + double t_int = normalized_autocorr[0]; // in principle, a vector dim_f + double coeff_tau = 6; // if exponential decay -> 0.25 % precision + for (int l_max = 1; l_max < coeff_tau * t_int; l_max++) t_int += normalized_autocorr[l_max]; + return int(t_int); + } + + // ------ Auto-correlation time from binning -------------------- + + template + double autocorrelation_time_from_binning(TimeSeries const& A, double intrinsic_variance, double precision = 0.05) { + + auto size = make_immutable_time_series(A).size(); + + auto t_cor = [&](int b) { + auto A_binned = make_binned_series(make_immutable_time_series(A), b); + double var = empirical_variance(A_binned); + return 0.5 * (b * var / intrinsic_variance - 1.); + }; + + double coeff = -6 * std::log(precision); // heuristic -> 0.25 % precision + int B = 2; + while (B < coeff * std::abs(t_cor(B))) ++B; + + // now, B is large enough: average over a few estimates + int Navg = std::min(100, int(size - B)); + double autocorr_time = 0.; + for (int b = 0; b < Navg; b++) autocorr_time += t_cor(B + b); + return autocorr_time / Navg; + } + + // ------ Auto-correlation time from binning -------------------- + + template double autocorrelation_time_from_binning(TimeSeries const& A) { + + auto size = make_immutable_time_series(A).size(); + double var1 = empirical_variance(make_immutable_time_series(A)); + + auto t_cor = [&](int b) { + auto A_binned = make_binned_series(make_immutable_time_series(A), b); + double var = empirical_variance(A_binned); + return .5 * var / var1 * b; + }; + + int B = 2; + double autocorr_time = t_cor(B); + double slope = 1.; + int small_slope_count = 0; + std::vector t; + while (small_slope_count < 100 && B < size / 10) { + double t_cor_new = t_cor(++B); + slope = (std::abs(t_cor_new - autocorr_time) / autocorr_time); + if (slope < 1e-5) small_slope_count++; + if (small_slope_count > 0) t.push_back(t_cor_new); + autocorr_time = t_cor_new; + } + return empirical_average(t); + } +} // namespace statistics +} // triqs