QuantumPackage/scripts/exc_energy_error.py

#!/usr/bin/env python
# Computes the error on the excitation energy of a CIPSI run.

def student(p,df):
    import scipy
    from scipy.stats import t
    return t.ppf(p, df)


def chi2cdf(x,k):
    import scipy
    import scipy.stats
    return scipy.stats.chi2.cdf(x,k)


def jarque_bera(data):

    n    = max(len(data), 2)
    norm = 1./ sum( [ w for (_,w) in data ] )

    mu     =   sum( [ w* x        for (x,w) in data ] ) * norm
    sigma2 =   sum( [ w*(x-mu)**2 for (x,w) in data ] ) * norm
    if sigma2 > 0.:
      S      = ( sum( [ w*(x-mu)**3 for (x,w) in data ] ) * norm ) / sigma2**(3./2.)
      K      = ( sum( [ w*(x-mu)**4 for (x,w) in data ] ) * norm ) / sigma2**2
    else:
      S = 0.
      K = 0.

    # Value of the Jarque-Bera test
    JB = n/6. * (S**2 + 1./4. * (K-3.)**2)

    # Probability that the data comes from a Gaussian distribution
    p = 1. - chi2cdf(JB,2)

    return JB, mu, sqrt(sigma2/(n-1)), p


to_eV = 27.2107362681
import sys, os
import scipy
import scipy.stats
from math import sqrt, gamma, exp
import json


def read_data(filename,state):
  """ Read energies and PT2 from input file """
  with open(filename,'r') as f:
      lines = json.load(f)['fci']

  print(f"State: {state}")

  gs = []
  es = []

  for l in lines:
      try:
        pt2_0 = l['states'][0]['pt2']
        e_0   = l['states'][0]['energy']
        pt2_1 = l['states'][state]['pt2']
        e_1   = l['states'][state]['energy']
        gs.append( (e_0, pt2_0) )
        es.append( (e_1, pt2_1) )
      except: pass

  def f(p_1, p0, p1):
      e, pt2 = p0
      y0, x0 = p_1
      y1, x1 = p1
      try:
        alpha = (y1-y0)/(x0-x1)
      except ZeroDivisionError:
        alpha = 1.
      return [e, pt2, alpha]

  for l in (gs, es):
      p_1, p0, p1 = l[0], l[0], l[1]
      l[0] = f(p_1, p0, p1)

      for i in range(1,len(l)-1):
          p_1 = (l[i-1][0], l[i-1][1])
          p0  = l[i]
          p1  = l[i+1]
          l[i] = f(p_1, p0, p1)

      i = len(l)-1
      p_1 = (l[i-1][0], l[i-1][1])
      p0  = l[i]
      p1  = l[-1]
      l[i] = f(p_1, p0, p1)

  return [ x+y for x,y in zip(gs,es) ]


def compute(data):

    d = []
    for e0, p0, a0, e1, p1, a1 in data:
        x   = (e1+p1)-(e0+p0)
        w   = 1./sqrt(p0**2 + p1**2)
        bias = (a1-1.)*p1 - (a0-1.)*p0
        d.append( (x,w,bias) )

    x = []
    target = (scipy.stats.norm.cdf(1.)-0.5)*2   # = 0.6827

    print("| %2s | %8s | %8s | %8s | %8s | %8s |"%( "N", "DE", "+/-", "bias", "P(G)", "J"))
    print("|----+----------+----------+----------+----------+----------|")
    xmax = (0.,0.,0.,0.,0.,0,0.)
    for i in range(len(data)-1):
        jb, mu, sigma, p = jarque_bera( [ (x,w) for (x,w,bias) in d[i:] ] )
        bias = sum ( [ w * e for (_,w,e) in d[i:] ] ) / sum ( [ w for (_,w,_) in d[i:] ] )
        mu = (mu+0.5*bias) * to_eV
        sigma = sigma * to_eV
        bias = bias * to_eV
        n = len(data[i:])
        beta = student(0.5*(1.+target/p) ,n)
        err = sigma * beta + 0.5*abs(bias)
        print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |"%( n, mu, err, bias, p, jb))
        if n < 3 :
            continue
        y = (err, p, mu, err, jb,n,bias)
        if p > xmax[1]: xmax = y
        if p < 0.8:
            continue
        x.append(y)

    x = sorted(x)

    print("|----+----------+----------+----------+----------+----------|")
    if x != []:
      xmax = x[0]
    _, p, mu, err, jb, n, bias = xmax
    beta = student(0.5*(1.+target/p),n)
    print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |\n"%(n, mu, err, bias, p, jb))

    return mu, err, bias, p

filename = sys.argv[1]
print(filename)
if len(sys.argv) > 2:
    state = int(sys.argv[2])
else:
    state = 1
data = read_data(filename,state)
mu, err, bias, _ = compute(data)
print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err))

import numpy as np
A = np.array( [ [ data[-1][1], 1. ],
                [ data[-2][1], 1. ] ] )
B = np.array( [ [ data[-1][0] ],
                [ data[-2][0] ] ] )
E0 = np.linalg.solve(A,B)[1]
A = np.array( [ [ data[-1][4], 1. ],
                [ data[-2][4], 1. ] ] )
B = np.array( [ [ data[-1][3] ],
                [ data[-2][3] ] ] )
E1 = np.linalg.solve(A,B)[1]
average_2 = (E1-E0)*to_eV

A = np.array( [ [ data[-1][1], 1. ],
                [ data[-2][1], 1. ],
                [ data[-3][1], 1. ] ] )
B = np.array( [ [ data[-1][0] ],
                [ data[-2][0] ],
                [ data[-3][0] ] ] )
E0 = np.linalg.lstsq(A,B,rcond=None)[0][1]
A = np.array( [ [ data[-1][4], 1. ],
                [ data[-2][4], 1. ],
                [ data[-3][4], 1. ] ] )
B = np.array( [ [ data[-1][3] ],
                [ data[-2][3] ],
                [ data[-3][3] ] ] )
E1 = np.linalg.lstsq(A,B,rcond=None)[0][1]
average_3 = (E1-E0)*to_eV

exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV
error_2 = abs(average_2 - average_3)
error_3 = abs(average_3 - exc)
print(" 2-3 points: %.3f +/- %.3f "% (average_3, error_2))
print(" largest wf: %.3f +/- %.3f  "%(average_3, error_3))
Added exc_energy_error.py 2023-04-24 01:01:31 +02:00			`#!/usr/bin/env python`
			`# Computes the error on the excitation energy of a CIPSI run.`

			`def student(p,df):`
			`import scipy`
			`from scipy.stats import t`
			`return t.ppf(p, df)`


			`def chi2cdf(x,k):`
			`import scipy`
			`import scipy.stats`
			`return scipy.stats.chi2.cdf(x,k)`


			`def jarque_bera(data):`

			`n = max(len(data), 2)`
			`norm = 1./ sum( [ w for (_,w) in data ] )`

			`mu = sum( [ w* x for (x,w) in data ] ) * norm`
			`sigma2 = sum( [ w(x-mu)2 for (x,w) in data ] ) norm`
			`if sigma2 > 0.:`
			`S = ( sum( [ w(x-mu)3 for (x,w) in data ] ) norm ) / sigma2**(3./2.)`
			`K = ( sum( [ w(x-mu)4 for (x,w) in data ] ) norm ) / sigma2**2`
			`else:`
			`S = 0.`
			`K = 0.`

			`# Value of the Jarque-Bera test`
			`JB = n/6. * (S*2 + 1./4. (K-3.)**2)`

			`# Probability that the data comes from a Gaussian distribution`
			`p = 1. - chi2cdf(JB,2)`

			`return JB, mu, sqrt(sigma2/(n-1)), p`



			`to_eV = 27.2107362681`
			`import sys, os`
			`import scipy`
			`import scipy.stats`
			`from math import sqrt, gamma, exp`
			`import json`


			`def read_data(filename,state):`
			`""" Read energies and PT2 from input file """`
			`with open(filename,'r') as f:`
			`lines = json.load(f)['fci']`

			`print(f"State: {state}")`

			`gs = []`
			`es = []`

			`for l in lines:`
			`try:`
			`pt2_0 = l['states'][0]['pt2']`
			`e_0 = l['states'][0]['energy']`
			`pt2_1 = l['states'][state]['pt2']`
			`e_1 = l['states'][state]['energy']`
			`gs.append( (e_0, pt2_0) )`
			`es.append( (e_1, pt2_1) )`
			`except: pass`

			`def f(p_1, p0, p1):`
			`e, pt2 = p0`
			`y0, x0 = p_1`
			`y1, x1 = p1`
			`try:`
			`alpha = (y1-y0)/(x0-x1)`
			`except ZeroDivisionError:`
			`alpha = 1.`
			`return [e, pt2, alpha]`

			`for l in (gs, es):`
			`p_1, p0, p1 = l[0], l[0], l[1]`
			`l[0] = f(p_1, p0, p1)`

			`for i in range(1,len(l)-1):`
			`p_1 = (l[i-1][0], l[i-1][1])`
			`p0 = l[i]`
			`p1 = l[i+1]`
			`l[i] = f(p_1, p0, p1)`

			`i = len(l)-1`
			`p_1 = (l[i-1][0], l[i-1][1])`
			`p0 = l[i]`
			`p1 = l[-1]`
			`l[i] = f(p_1, p0, p1)`

			`return [ x+y for x,y in zip(gs,es) ]`


			`def compute(data):`

			`d = []`
			`for e0, p0, a0, e1, p1, a1 in data:`
			`x = (e1+p1)-(e0+p0)`
			`w = 1./sqrt(p02 + p12)`
			`bias = (a1-1.)p1 - (a0-1.)p0`
			`d.append( (x,w,bias) )`

			`x = []`
			`target = (scipy.stats.norm.cdf(1.)-0.5)*2 # = 0.6827`

			`print("\| %2s \| %8s \| %8s \| %8s \| %8s \| %8s \|"%( "N", "DE", "+/-", "bias", "P(G)", "J"))`
			`print("\|----+----------+----------+----------+----------+----------\|")`
			`xmax = (0.,0.,0.,0.,0.,0,0.)`
			`for i in range(len(data)-1):`
			`jb, mu, sigma, p = jarque_bera( [ (x,w) for (x,w,bias) in d[i:] ] )`
			`bias = sum ( [ w * e for (_,w,e) in d[i:] ] ) / sum ( [ w for (_,w,_) in d[i:] ] )`
			`mu = (mu+0.5bias) to_eV`
			`sigma = sigma * to_eV`
			`bias = bias * to_eV`
			`n = len(data[i:])`
			`beta = student(0.5*(1.+target/p) ,n)`
			`err = sigma * beta + 0.5*abs(bias)`
			`print("\| %2d \| %8.3f \| %8.3f \| %8.3f \| %8.3f \| %8.3f \|"%( n, mu, err, bias, p, jb))`
			`if n < 3 :`
			`continue`
			`y = (err, p, mu, err, jb,n,bias)`
			`if p > xmax[1]: xmax = y`
			`if p < 0.8:`
			`continue`
			`x.append(y)`

			`x = sorted(x)`

			`print("\|----+----------+----------+----------+----------+----------\|")`
			`if x != []:`
			`xmax = x[0]`
			`_, p, mu, err, jb, n, bias = xmax`
			`beta = student(0.5*(1.+target/p),n)`
			`print("\| %2d \| %8.3f \| %8.3f \| %8.3f \| %8.3f \| %8.3f \|\n"%(n, mu, err, bias, p, jb))`

			`return mu, err, bias, p`

			`filename = sys.argv[1]`
			`print(filename)`
			`if len(sys.argv) > 2:`
			`state = int(sys.argv[2])`
			`else:`
			`state = 1`
			`data = read_data(filename,state)`
			`mu, err, bias, _ = compute(data)`
			`print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err))`

			`import numpy as np`
			`A = np.array( [ [ data[-1][1], 1. ],`
			`[ data[-2][1], 1. ] ] )`
			`B = np.array( [ [ data[-1][0] ],`
			`[ data[-2][0] ] ] )`
			`E0 = np.linalg.solve(A,B)[1]`
			`A = np.array( [ [ data[-1][4], 1. ],`
			`[ data[-2][4], 1. ] ] )`
			`B = np.array( [ [ data[-1][3] ],`
			`[ data[-2][3] ] ] )`
			`E1 = np.linalg.solve(A,B)[1]`
			`average_2 = (E1-E0)*to_eV`

			`A = np.array( [ [ data[-1][1], 1. ],`
			`[ data[-2][1], 1. ],`
			`[ data[-3][1], 1. ] ] )`
			`B = np.array( [ [ data[-1][0] ],`
			`[ data[-2][0] ],`
			`[ data[-3][0] ] ] )`
			`E0 = np.linalg.lstsq(A,B,rcond=None)[0][1]`
			`A = np.array( [ [ data[-1][4], 1. ],`
			`[ data[-2][4], 1. ],`
			`[ data[-3][4], 1. ] ] )`
			`B = np.array( [ [ data[-1][3] ],`
			`[ data[-2][3] ],`
			`[ data[-3][3] ] ] )`
			`E1 = np.linalg.lstsq(A,B,rcond=None)[0][1]`
			`average_3 = (E1-E0)*to_eV`

			`exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV`
			`error_2 = abs(average_2 - average_3)`
			`error_3 = abs(average_3 - exc)`
			`print(" 2-3 points: %.3f +/- %.3f "% (average_3, error_2))`
			`print(" largest wf: %.3f +/- %.3f "%(average_3, error_3))`