From dd5291d90dca05f50976b10d42d9da5657641058 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:01:31 +0200 Subject: [PATCH] Added exc_energy_error.py --- scripts/exc_energy_error.py | 186 ++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100755 scripts/exc_energy_error.py diff --git a/scripts/exc_energy_error.py b/scripts/exc_energy_error.py new file mode 100755 index 00000000..ba9d7917 --- /dev/null +++ b/scripts/exc_energy_error.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# Computes the error on the excitation energy of a CIPSI run. + +def student(p,df): + import scipy + from scipy.stats import t + return t.ppf(p, df) + + +def chi2cdf(x,k): + import scipy + import scipy.stats + return scipy.stats.chi2.cdf(x,k) + + +def jarque_bera(data): + + n = max(len(data), 2) + norm = 1./ sum( [ w for (_,w) in data ] ) + + mu = sum( [ w* x for (x,w) in data ] ) * norm + sigma2 = sum( [ w*(x-mu)**2 for (x,w) in data ] ) * norm + if sigma2 > 0.: + S = ( sum( [ w*(x-mu)**3 for (x,w) in data ] ) * norm ) / sigma2**(3./2.) + K = ( sum( [ w*(x-mu)**4 for (x,w) in data ] ) * norm ) / sigma2**2 + else: + S = 0. + K = 0. + + # Value of the Jarque-Bera test + JB = n/6. * (S**2 + 1./4. * (K-3.)**2) + + # Probability that the data comes from a Gaussian distribution + p = 1. - chi2cdf(JB,2) + + return JB, mu, sqrt(sigma2/(n-1)), p + + + +to_eV = 27.2107362681 +import sys, os +import scipy +import scipy.stats +from math import sqrt, gamma, exp +import json + + +def read_data(filename,state): + """ Read energies and PT2 from input file """ + with open(filename,'r') as f: + lines = json.load(f)['fci'] + + print(f"State: {state}") + + gs = [] + es = [] + + for l in lines: + try: + pt2_0 = l['states'][0]['pt2'] + e_0 = l['states'][0]['energy'] + pt2_1 = l['states'][state]['pt2'] + e_1 = l['states'][state]['energy'] + gs.append( (e_0, pt2_0) ) + es.append( (e_1, pt2_1) ) + except: pass + + def f(p_1, p0, p1): + e, pt2 = p0 + y0, x0 = p_1 + y1, x1 = p1 + try: + alpha = (y1-y0)/(x0-x1) + except ZeroDivisionError: + alpha = 1. + return [e, pt2, alpha] + + for l in (gs, es): + p_1, p0, p1 = l[0], l[0], l[1] + l[0] = f(p_1, p0, p1) + + for i in range(1,len(l)-1): + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[i+1] + l[i] = f(p_1, p0, p1) + + i = len(l)-1 + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[-1] + l[i] = f(p_1, p0, p1) + + return [ x+y for x,y in zip(gs,es) ] + + +def compute(data): + + d = [] + for e0, p0, a0, e1, p1, a1 in data: + x = (e1+p1)-(e0+p0) + w = 1./sqrt(p0**2 + p1**2) + bias = (a1-1.)*p1 - (a0-1.)*p0 + d.append( (x,w,bias) ) + + x = [] + target = (scipy.stats.norm.cdf(1.)-0.5)*2 # = 0.6827 + + print("| %2s | %8s | %8s | %8s | %8s | %8s |"%( "N", "DE", "+/-", "bias", "P(G)", "J")) + print("|----+----------+----------+----------+----------+----------|") + xmax = (0.,0.,0.,0.,0.,0,0.) + for i in range(len(data)-1): + jb, mu, sigma, p = jarque_bera( [ (x,w) for (x,w,bias) in d[i:] ] ) + bias = sum ( [ w * e for (_,w,e) in d[i:] ] ) / sum ( [ w for (_,w,_) in d[i:] ] ) + mu = (mu+0.5*bias) * to_eV + sigma = sigma * to_eV + bias = bias * to_eV + n = len(data[i:]) + beta = student(0.5*(1.+target/p) ,n) + err = sigma * beta + 0.5*abs(bias) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |"%( n, mu, err, bias, p, jb)) + if n < 3 : + continue + y = (err, p, mu, err, jb,n,bias) + if p > xmax[1]: xmax = y + if p < 0.8: + continue + x.append(y) + + x = sorted(x) + + print("|----+----------+----------+----------+----------+----------|") + if x != []: + xmax = x[0] + _, p, mu, err, jb, n, bias = xmax + beta = student(0.5*(1.+target/p),n) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |\n"%(n, mu, err, bias, p, jb)) + + return mu, err, bias, p + +filename = sys.argv[1] +print(filename) +if len(sys.argv) > 2: + state = int(sys.argv[2]) +else: + state = 1 +data = read_data(filename,state) +mu, err, bias, _ = compute(data) +print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err)) + +import numpy as np +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ] ] ) +E0 = np.linalg.solve(A,B)[1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ] ] ) +E1 = np.linalg.solve(A,B)[1] +average_2 = (E1-E0)*to_eV + +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ], + [ data[-3][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ], + [ data[-3][0] ] ] ) +E0 = np.linalg.lstsq(A,B,rcond=None)[0][1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ], + [ data[-3][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ], + [ data[-3][3] ] ] ) +E1 = np.linalg.lstsq(A,B,rcond=None)[0][1] +average_3 = (E1-E0)*to_eV + +exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV +error_2 = abs(average_2 - average_3) +error_3 = abs(average_3 - exc) +print(" 2-3 points: %.3f +/- %.3f "% (average_3, error_2)) +print(" largest wf: %.3f +/- %.3f "%(average_3, error_3)) + +