#+TITLE: Quantum Monte Carlo
#+AUTHOR: Anthony Scemama, Claudia Filippi
# SETUPFILE: https://fniessen.github.io/org-html-themes/org/theme-readtheorg.setup
# SETUPFILE: https://fniessen.github.io/org-html-themes/org/theme-bigblow.setup
#+STARTUP: latexpreview

#+HTML_HEAD: <link rel="stylesheet" title="Standard" href="worg.css" type="text/css" />


* Introduction

  We propose different exercises to understand quantum Monte Carlo (QMC)
  methods. In the first section, we propose to compute the energy of a
  hydrogen atom using numerical integration. The goal of this section is
  to introduce the /local energy/.
  Then we introduce the variational Monte Carlo (VMC) method which
  computes a statistical estimate of the expectation value of the energy
  associated with a given wave function.
  Finally, we introduce the diffusion Monte Carlo (DMC) method which
  gives the exact energy of the $H_2$ molecule. 

  Code examples will be given in Python and Fortran. Whatever language
  can be chosen.

  We consider the stationary solution of the Schrödinger equation, so
  the wave functions considered here are real: for an $N$ electron
  system where the electrons move in the 3-dimensional space,
  $\Psi : \mathbb{R}^{3N} \rightarrow \mathbb{R}$. In addition, $\Psi$
  is defined everywhere, continuous and infinitely differentiable.
  
  *Note*
  #+begin_important
  In Fortran, when  you use a double precision  constant, don't forget
  to  put  d0 as  a  suffix (for example  2.0d0),  or  it will  be
  interpreted as a single precision value
  #+end_important


* Numerical evaluation of the energy

  In this section we consider the Hydrogen atom with the following
  wave function:

  $$
  \Psi(\mathbf{r}) = \exp(-a |\mathbf{r}|)
  $$

  We will first verify that $\Psi$ is an eigenfunction of the Hamiltonian

  $$
  \hat{H} = \hat{T} + \hat{V} = - \frac{1}{2} \Delta - \frac{1}{|\mathbf{r}|}
  $$

  when $a=1$, by checking that $\hat{H}\Psi(\mathbf{r}) = E\Psi(\mathbf{r})$ for
  all $\mathbf{r}$. We will check that the local energy, defined as

  $$
  E_L(\mathbf{r}) = \frac{\hat{H} \Psi(\mathbf{r})}{\Psi(\mathbf{r})},
  $$

  is constant. We will also see that when $a \ne 1$ the local energy
  is not constant, so $\hat{H} \Psi \ne E \Psi$.


  The probabilistic /expected value/ of an arbitrary function $f(x)$
  with respect to a probability density function $p(x)$ is given by

  $$ \langle f \rangle_p = \int_{-\infty}^\infty p(x)\, f(x)\,dx $$.

  Recall that a probability density function $p(x)$ is non-negative
  and integrates to one:

  $$ \int_{-\infty}^\infty p(x)\,dx = 1 $$.

    
  The electronic energy of a system is the expectation value of the
  local energy $E(\mathbf{r})$ with respect to the 3N-dimensional
  electron density given by the square of the wave function:

  \begin{eqnarray*}
  E & = & \frac{\langle \Psi| \hat{H} | \Psi\rangle}{\langle \Psi |\Psi \rangle} 
      =   \frac{\int \Psi(\mathbf{r})\, \hat{H} \Psi(\mathbf{r})\, d\mathbf{r}}{\int \left[\Psi(\mathbf{r}) \right]^2 d\mathbf{r}} \\
    & = & \frac{\int \left[\Psi(\mathbf{r})\right]^2\, \frac{\hat{H} \Psi(\mathbf{r})}{\Psi(\mathbf{r})}\,d\mathbf{r}}{\int \left[\Psi(\mathbf{r}) \right]^2 d\mathbf{r}} 
      =   \frac{\int \left[\Psi(\mathbf{r})\right]^2\, E_L(\mathbf{r})\,d\mathbf{r}}{\int \left[\Psi(\mathbf{r}) \right]^2 d\mathbf{r}} 
      =   \langle E_L \rangle_{\Psi^2}
  \end{eqnarray*}

** Local energy
   :PROPERTIES:
   :header-args:python: :tangle hydrogen.py
   :header-args:f90: :tangle hydrogen.f90
   :END:

*** Exercise 1

    #+begin_exercise
    Write a function which computes the potential at $\mathbf{r}$.
    The function accepts a 3-dimensional vector =r= as input arguments
    and returns the potential.
    #+end_exercise

    $\mathbf{r}=\left( \begin{array}{c} x \\ y\\ z\end{array} \right)$, so
    $$
    V(\mathbf{r}) = -\frac{1}{\sqrt{x^2 + y^2 + z^2}}
    $$

    *Python*
     #+BEGIN_SRC python :results none
import numpy as np

def potential(r):
    return -1. / np.sqrt(np.dot(r,r))
     #+END_SRC


     *Fortran*
     #+BEGIN_SRC f90 
double precision function potential(r)
  implicit none
  double precision, intent(in) :: r(3)
  potential = -1.d0 / dsqrt( r(1)*r(1) + r(2)*r(2) + r(3)*r(3) )
end function potential
     #+END_SRC

*** Exercise 2 
    #+begin_exercise
    Write a function which computes the wave function at $\mathbf{r}$.
    The function accepts a scalar =a= and a 3-dimensional vector =r= as
    input arguments, and returns a scalar.
    #+end_exercise

    
    *Python*
     #+BEGIN_SRC python :results none
def psi(a, r):
    return np.exp(-a*np.sqrt(np.dot(r,r)))
     #+END_SRC

     *Fortran*
     #+BEGIN_SRC f90 
double precision function psi(a, r)
  implicit none
  double precision, intent(in) :: a, r(3)
  psi = dexp(-a * dsqrt( r(1)*r(1) + r(2)*r(2) + r(3)*r(3) ))
end function psi
     #+END_SRC
     
*** Exercise 3
    #+begin_exercise
    Write a function which computes the local kinetic energy at $\mathbf{r}$.
    The function accepts =a= and =r= as input arguments and returns the
    local kinetic energy.
    #+end_exercise

    The local kinetic energy is defined as $$-\frac{1}{2}\frac{\Delta \Psi}{\Psi}.$$
     
    We differentiate $\Psi$ with respect to $x$:
     
    \[\Psi(\mathbf{r})  =  \exp(-a\,|\mathbf{r}|) \]
    \[\frac{\partial \Psi}{\partial x}
      = \frac{\partial \Psi}{\partial |\mathbf{r}|} \frac{\partial |\mathbf{r}|}{\partial x}   
      =  - \frac{a\,x}{|\mathbf{r}|} \Psi(\mathbf{r}) \]

    and we differentiate a second time:

    $$
    \frac{\partial^2 \Psi}{\partial x^2} =
    \left( \frac{a^2\,x^2}{|\mathbf{r}|^2}  -
    \frac{a(y^2+z^2)}{|\mathbf{r}|^{3}} \right) \Psi(\mathbf{r}).
    $$

    The Laplacian operator $\Delta = \frac{\partial^2}{\partial x^2} +
    \frac{\partial^2}{\partial y^2} + \frac{\partial^2}{\partial z^2}$
    applied to the wave function gives:

    $$
    \Delta \Psi (\mathbf{r}) = \left(a^2 - \frac{2a}{\mathbf{|r|}} \right) \Psi(\mathbf{r})
    $$

    So the local kinetic energy is
    $$
    -\frac{1}{2} \frac{\Delta \Psi}{\Psi} (\mathbf{r}) = -\frac{1}{2}\left(a^2 - \frac{2a}{\mathbf{|r|}} \right) 
    $$
     
    *Python*
     #+BEGIN_SRC python :results none
def kinetic(a,r):
    return -0.5 * (a**2 - (2.*a)/np.sqrt(np.dot(r,r)))
     #+END_SRC

     *Fortran*
     #+BEGIN_SRC f90 
double precision function kinetic(a,r)
  implicit none
  double precision, intent(in) :: a, r(3)
  kinetic = -0.5d0 * (a*a - (2.d0*a) / &
       dsqrt( r(1)*r(1) + r(2)*r(2) + r(3)*r(3) ) ) 
end function kinetic
     #+END_SRC

*** Exercise 4
    #+begin_exercise
    Write a function which computes the local energy at $\mathbf{r}$.
    The function accepts =x,y,z= as input arguments and returns the
    local energy.
    #+end_exercise
   
    $$
    E_L(\mathbf{r}) = -\frac{1}{2} \frac{\Delta \Psi}{\Psi} (\mathbf{r}) + V(\mathbf{r})
    $$

    
    *Python*
     #+BEGIN_SRC python :results none
def e_loc(a,r):
    return kinetic(a,r) + potential(r)
     #+END_SRC

     *Fortran*
     #+BEGIN_SRC f90
double precision function e_loc(a,r)
  implicit none
  double precision, intent(in) :: a, r(3)
  double precision, external   :: kinetic, potential
  e_loc = kinetic(a,r) + potential(r)
end function e_loc
     #+END_SRC
   
** Plot of the local energy along the $x$ axis
   :PROPERTIES:
   :header-args:python: :tangle plot_hydrogen.py
   :header-args:f90: :tangle plot_hydrogen.f90
   :END:

   
*** Exercise
    #+begin_exercise
    For multiple values of $a$ (0.1, 0.2, 0.5, 1., 1.5, 2.), plot the
    local energy along the $x$ axis.
    #+end_exercise

    *Python*
    #+BEGIN_SRC python :results none
import numpy as np
import matplotlib.pyplot as plt

from hydrogen import e_loc

x=np.linspace(-5,5)

def make_array(a):
  y=np.array([ e_loc(a, np.array([t,0.,0.]) ) for t in x])
  return y

plt.figure(figsize=(10,5))
for a in [0.1, 0.2, 0.5, 1., 1.5, 2.]:
  y = make_array(a)
  plt.plot(x,y,label=f"a={a}")

plt.tight_layout()

plt.legend()

plt.savefig("plot_py.png")
    #+end_src

    #+RESULTS:

    [[./plot_py.png]]


    *Fortran*
    #+begin_src f90 
program plot
  implicit none
  double precision, external :: e_loc

  double precision :: x(50), energy, dx, r(3), a(6)
  integer :: i, j

  a = (/ 0.1d0, 0.2d0, 0.5d0, 1.d0, 1.5d0, 2.d0 /)

  dx = 10.d0/(size(x)-1)
  do i=1,size(x)
     x(i) = -5.d0 + (i-1)*dx
  end do

  r(:) = 0.d0

  do j=1,size(a)
     print *, '# a=', a(j)
     do i=1,size(x)
        r(1) = x(i)
        energy = e_loc( a(j), r )
        print *, x(i), energy
     end do
     print *, ''
     print *, ''
  end do

end program plot
    #+end_src

    To compile and run:

    #+begin_src sh :exports both
gfortran hydrogen.f90 plot_hydrogen.f90 -o plot_hydrogen
./plot_hydrogen > data
    #+end_src

    #+RESULTS:

    To plot the data using gnuplot:

    #+begin_src gnuplot :file plot.png :exports both
set grid
set xrange [-5:5]
set yrange [-2:1]
plot './data' index 0 using 1:2 with lines title 'a=0.1', \
     './data' index 1 using 1:2 with lines title 'a=0.2', \
     './data' index 2 using 1:2 with lines title 'a=0.5', \
     './data' index 3 using 1:2 with lines title 'a=1.0', \
     './data' index 4 using 1:2 with lines title 'a=1.5', \
     './data' index 5 using 1:2 with lines title 'a=2.0'
    #+end_src

    #+RESULTS:
    [[file:plot.png]]

** Numerical estimation of the energy
   :PROPERTIES:
   :header-args:python: :tangle energy_hydrogen.py
   :header-args:f90: :tangle energy_hydrogen.f90
   :END:

   If the space is discretized in small volume elements $\mathbf{r}_i$
   of size $\delta \mathbf{r}$, the expression of $\langle E_L \rangle_{\Psi^2}$
   becomes a weighted average of the local energy, where the weights
   are the values of the probability density at $\mathbf{r}_i$
   multiplied by the volume element:
     
   $$
   \langle E \rangle_{\Psi^2} \approx \frac{\sum_i w_i E_L(\mathbf{r}_i)}{\sum_i w_i}, \;\;
   w_i = \left[\Psi(\mathbf{r}_i)\right]^2 \delta \mathbf{r}
   $$
     
   #+begin_note
   The energy is biased because:
   - The volume elements are not infinitely small (discretization error)
   - The energy is evaluated only inside the box (incompleteness of the space)
   #+end_note

   
*** Exercise
     #+begin_exercise
    Compute a numerical estimate of the energy in a grid of
    $50\times50\times50$ points in the range $(-5,-5,-5) \le
    \mathbf{r} \le (5,5,5)$.
     #+end_exercise

    *Python*
      #+BEGIN_SRC python :results none
import numpy as np
from hydrogen import e_loc, psi

interval = np.linspace(-5,5,num=50)
delta = (interval[1]-interval[0])**3

r = np.array([0.,0.,0.])

for a in [0.1, 0.2, 0.5, 0.9, 1., 1.5, 2.]:
    E = 0.
    norm = 0.
      for x in interval:
          r[0] = x
            for y in interval:
                r[1] = y
                  for z in interval:
                      r[2] = z
                      w = psi(a,r)
                      w = w * w * delta
                      E    += w * e_loc(a,r)
                      norm += w 
    E = E / norm
    print(f"a = {a} \t E = {E}")                

      #+end_src

      #+RESULTS:
      : a = 0.1 	 E = -0.24518438948809218
      : a = 0.2 	 E = -0.26966057967803525
      : a = 0.5 	 E = -0.3856357612517407
      : a = 0.9 	 E = -0.49435709786716214
      : a = 1.0 	 E = -0.5
      : a = 1.5 	 E = -0.39242967082602226
      : a = 2.0 	 E = -0.08086980667844901

      *Fortran*
      #+begin_src f90 
program energy_hydrogen
  implicit none
  double precision, external :: e_loc, psi
  double precision :: x(50), w, delta, energy, dx, r(3), a(6), norm
  integer :: i, k, l, j

  a = (/ 0.1d0, 0.2d0, 0.5d0, 1.d0, 1.5d0, 2.d0 /)

  dx = 10.d0/(size(x)-1)
  do i=1,size(x)
     x(i) = -5.d0 + (i-1)*dx
  end do

  delta = dx**3

  r(:) = 0.d0

  do j=1,size(a)
     energy = 0.d0
     norm = 0.d0
     do i=1,size(x)
        r(1) = x(i)
        do k=1,size(x)
           r(2) = x(k)
           do l=1,size(x)
              r(3) = x(l)
              w = psi(a(j),r)
              w = w * w * delta
              energy = energy + w * e_loc(a(j), r)
              norm   = norm   + w 
           end do
        end do
     end do
     energy = energy / norm
     print *, 'a = ', a(j), '    E = ', energy
  end do

end program energy_hydrogen
      #+end_src

      To compile the Fortran and run it:

      #+begin_src sh :results output :exports both
gfortran hydrogen.f90 energy_hydrogen.f90 -o energy_hydrogen
./energy_hydrogen 
      #+end_src

      #+RESULTS:
      :  a =   0.10000000000000001          E =  -0.24518438948809140     
      :  a =   0.20000000000000001          E =  -0.26966057967803236     
      :  a =   0.50000000000000000          E =  -0.38563576125173815     
      :  a =    1.0000000000000000          E =  -0.50000000000000000     
      :  a =    1.5000000000000000          E =  -0.39242967082602065     
      :  a =    2.0000000000000000          E =   -8.0869806678448772E-002

** Variance of the local energy
   :PROPERTIES:
   :header-args:python: :tangle variance_hydrogen.py
   :header-args:f90: :tangle variance_hydrogen.f90
   :END:

   The variance of the local energy is a functional of $\Psi$
   which measures the magnitude of the fluctuations of the local
   energy associated with $\Psi$ around the average:

   $$
   \sigma^2(E_L) = \frac{\int \left[\Psi(\mathbf{r})\right]^2\, \left[
   E_L(\mathbf{r}) - E \right]^2 \, d\mathbf{r}}{\int \left[\Psi(\mathbf{r}) \right]^2 d\mathbf{r}}
   $$

   If the local energy is constant (i.e. $\Psi$ is an eigenfunction of
   $\hat{H}$) the variance is zero, so the variance of the local
   energy can be used as a measure of the quality of a wave function.

*** Exercise
   #+begin_exercise
   Compute a numerical estimate of the variance of the local energy
   in a grid of $50\times50\times50$ points in the range
   $(-5,-5,-5)
   \le \mathbf{r} \le (5,5,5)$ for different values of $a$.
   #+end_exercise
     
   *Python*
   #+begin_src python :results none
import numpy as np
from hydrogen import e_loc, psi

interval = np.linspace(-5,5,num=50)
delta = (interval[1]-interval[0])**3

r = np.array([0.,0.,0.])

for a in [0.1, 0.2, 0.5, 0.9, 1., 1.5, 2.]:
    E = 0.
    norm = 0.
    for x in interval:
        r[0] = x
        for y in interval:
            r[1] = y
            for z in interval:
                r[2] = z
                w = psi(a, r)
                w = w * w * delta
                El = e_loc(a, r)
                E  += w * El
                norm += w 
    E = E / norm
    s2 = 0.
    for x in interval:
        r[0] = x
        for y in interval:
            r[1] = y
            for z in interval:
                r[2] = z
                w = psi(a, r)
                w = w * w * delta
                El = e_loc(a, r)
                s2 += w * (El - E)**2
    s2 = s2 / norm
    print(f"a = {a} \t E = {E:10.8f}  \t  \sigma^2 = {s2:10.8f}")
   #+end_src

   #+RESULTS:
   : a = 0.1 	 E = -0.24518439  	  \sigma^2 = 0.02696522
   : a = 0.2 	 E = -0.26966058  	  \sigma^2 = 0.03719707
   : a = 0.5 	 E = -0.38563576  	  \sigma^2 = 0.05318597
   : a = 0.9 	 E = -0.49435710  	  \sigma^2 = 0.00577812
   : a = 1.0 	 E = -0.50000000  	  \sigma^2 = 0.00000000
   : a = 1.5 	 E = -0.39242967  	  \sigma^2 = 0.31449671
   : a = 2.0 	 E = -0.08086981  	  \sigma^2 = 1.80688143

   *Fortran*
   #+begin_src f90 
program variance_hydrogen
  implicit none
  double precision, external :: e_loc, psi
  double precision :: x(50), w, delta, energy, dx, r(3), a(6), norm, s2
  integer :: i, k, l, j

  a = (/ 0.1d0, 0.2d0, 0.5d0, 1.d0, 1.5d0, 2.d0 /)

  dx = 10.d0/(size(x)-1)
  do i=1,size(x)
     x(i) = -5.d0 + (i-1)*dx
  end do

  delta = dx**3

  r(:) = 0.d0

  do j=1,size(a)
     energy = 0.d0
     norm = 0.d0
     do i=1,size(x)
        r(1) = x(i)
        do k=1,size(x)
           r(2) = x(k)
           do l=1,size(x)
              r(3) = x(l)
              w = psi(a(j),r)
              w = w * w * delta
              energy = energy + w * e_loc(a(j), r)
              norm   = norm   + w 
           end do
        end do
     end do
     energy = energy / norm

     s2 = 0.d0
     norm = 0.d0
     do i=1,size(x)
        r(1) = x(i)
        do k=1,size(x)
           r(2) = x(k)
           do l=1,size(x)
              r(3) = x(l)
              w = psi(a(j),r)
              w = w * w * delta
              s2 = s2 + w * ( e_loc(a(j), r) - energy )**2
              norm   = norm   + w 
           end do
        end do
     end do
     s2 = s2 / norm
     print *, 'a = ', a(j), ' E = ', energy, ' s2 = ', s2
  end do

end program variance_hydrogen
   #+end_src

   To compile and run:

   #+begin_src sh :results output :exports both
gfortran hydrogen.f90 variance_hydrogen.f90 -o variance_hydrogen
./variance_hydrogen 
   #+end_src

   #+RESULTS:
   :  a =   0.10000000000000001       E =  -0.24518438948809140       s2 =    2.6965218719733813E-002
   :  a =   0.20000000000000001       E =  -0.26966057967803236       s2 =    3.7197072370217653E-002
   :  a =   0.50000000000000000       E =  -0.38563576125173815       s2 =    5.3185967578488862E-002
   :  a =    1.0000000000000000       E =  -0.50000000000000000       s2 =    0.0000000000000000     
   :  a =    1.5000000000000000       E =  -0.39242967082602065       s2 =   0.31449670909180444     
   :  a =    2.0000000000000000       E =   -8.0869806678448772E-002  s2 =    1.8068814270851303     


* Variational Monte Carlo

  Numerical integration with deterministic methods is very efficient
  in low dimensions. When the number of dimensions becomes large,
  instead of computing the average energy as a numerical integration
  on a grid, it is usually more efficient to do a Monte Carlo sampling.

  Moreover, a Monte Carlo sampling will alow us to remove the bias due
  to the discretization of space, and compute a statistical confidence
  interval.

** Computation of the statistical error
   :PROPERTIES:
   :header-args:python: :tangle qmc_stats.py
   :header-args:f90: :tangle qmc_stats.f90
   :END:

   To compute the statistical error, you need to perform $M$
   independent Monte Carlo calculations. You will obtain $M$ different
   estimates of the energy, which are expected to have a Gaussian
   distribution by the central limit theorem.

   The estimate of the energy is

   $$
   E = \frac{1}{M} \sum_{i=1}^M E_M
   $$

   The variance of the average energies can be computed as

   $$
   \sigma^2 = \frac{1}{M-1} \sum_{i=1}^{M} (E_M - E)^2
   $$

   And the confidence interval is given by

   $$
   E \pm \delta E, \text{ where } \delta E = \frac{\sigma}{\sqrt{M}}
   $$
   
*** Exercise
   #+begin_exercise
   Write a function returning the average and statistical error of an
   input array.
   #+end_exercise

   *Python*
   #+BEGIN_SRC python :results none
from math import sqrt
def ave_error(arr):
    M = len(arr)
    assert (M>1)
    average = sum(arr)/M
    variance = 1./(M-1) * sum( [ (x - average)**2 for x in arr ] )
    return (average, sqrt(variance/M))
   #+END_SRC

   *Fortran*
   #+BEGIN_SRC f90
subroutine ave_error(x,n,ave,err)
  implicit none
  integer, intent(in)           :: n 
  double precision, intent(in)  :: x(n) 
  double precision, intent(out) :: ave, err
  double precision :: variance
  if (n == 1) then
     ave = x(1)
     err = 0.d0
  else
     ave = sum(x(:)) / dble(n)
     variance = sum( (x(:) - ave)**2 ) / dble(n-1)
     err = dsqrt(variance/dble(n))
  endif
end subroutine ave_error
   #+END_SRC
   
** Uniform sampling in the box
   :PROPERTIES:
   :header-args:python: :tangle qmc_uniform.py
   :header-args:f90: :tangle qmc_uniform.f90
   :END:

   We will now do our first Monte Carlo calculation to compute the
   energy of the hydrogen atom.
   
   At every Monte Carlo step:

   - Draw a random point $\mathbf{r}_i$ in the box $(-5,-5,-5) \le
     (x,y,z) \le (5,5,5)$
   - Compute $[\Psi(\mathbf{r}_i)]^2$ and accumulate the result in a
     variable =normalization=
   - Compute $[\Psi(\mathbf{r}_i)]^2 \times E_L(\mathbf{r}_i)$, and accumulate the
     result in a variable =energy=

   One Monte Carlo run will consist of $N$ Monte Carlo steps. Once all the
   steps have been computed, the run returns the average energy
   $\bar{E}_k$ over the $N$ steps of the run.

   To compute the statistical error, perform $M$ runs. The final
   estimate of the energy will be the average over the $\bar{E}_k$,
   and the variance of the $\bar{E}_k$ will be used to compute the
   statistical error.
   
*** Exercise

    #+begin_exercise
    Parameterize the wave function with $a=0.9$.  Perform 30
    independent Monte Carlo runs, each with 100 000 Monte Carlo
    steps. Store the final energies of each run and use this array to
    compute the average energy and the associated error bar.
    #+end_exercise

    *Python*
    #+BEGIN_SRC python :results output
from hydrogen  import *
from qmc_stats import *

def MonteCarlo(a, nmax):
     E = 0.
     N = 0.
     for istep in range(nmax):
          r = np.random.uniform(-5., 5., (3))
          w = psi(a,r)
          w = w*w
          N += w
          E += w * e_loc(a,r)
   return E/N

a = 0.9
nmax = 100000
X = [MonteCarlo(a,nmax) for i in range(30)]
E, deltaE = ave_error(X)
print(f"E = {E} +/- {deltaE}")
    #+END_SRC

    #+RESULTS:
    : E = -0.4956255109300764 +/- 0.0007082875482711226

    *Fortran*
    #+BEGIN_SRC f90
subroutine uniform_montecarlo(a,nmax,energy)
  implicit none
  double precision, intent(in)  :: a
  integer         , intent(in)  :: nmax 
  double precision, intent(out) :: energy

  integer*8 :: istep

  double precision :: norm, r(3), w

  double precision, external :: e_loc, psi

  energy = 0.d0
  norm   = 0.d0
  do istep = 1,nmax
     call random_number(r)
     r(:) = -5.d0 + 10.d0*r(:)
     w = psi(a,r)
     w = w*w
     norm = norm + w
     energy = energy + w * e_loc(a,r)
  end do
  energy = energy / norm
end subroutine uniform_montecarlo

program qmc
  implicit none
  double precision, parameter :: a = 0.9
  integer         , parameter :: nmax = 100000
  integer         , parameter :: nruns = 30

  integer :: irun
  double precision :: X(nruns)
  double precision :: ave, err

  do irun=1,nruns
     call uniform_montecarlo(a,nmax,X(irun))
  enddo
  call ave_error(X,nruns,ave,err)
  print *, 'E = ', ave, '+/-', err
end program qmc
    #+END_SRC

    #+begin_src sh :results output :exports both
gfortran hydrogen.f90 qmc_stats.f90 qmc_uniform.f90 -o qmc_uniform
./qmc_uniform
    #+end_src

    #+RESULTS:
    :  E =  -0.49588321986667677      +/-   7.1758863546737969E-004

** Gaussian sampling 
   :PROPERTIES:
   :header-args:python: :tangle qmc_gaussian.py
   :header-args:f90: :tangle qmc_gaussian.f90
   :END:

   We will now improve the sampling and allow to sample in the whole
   3D space, correcting the bias related to the sampling in the box.

   Instead of drawing uniform random numbers, we will draw Gaussian
   random numbers centered on 0 and with a variance of 1.

   To obtain Gaussian-distributed random numbers, you can apply the
   [[https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform][Box Muller transform]] to uniform random numbers:

   \begin{eqnarray*}
   z_1 &=& \sqrt{-2 \ln u_1} \cos(2 \pi u_2) \\
   z_2 &=& \sqrt{-2 \ln u_1} \sin(2 \pi u_2) 
   \end{eqnarray*}

   Here is a Fortran implementation returning a Gaussian-distributed
   n-dimensional vector $\mathbf{z}$;

   *Fortran*
   #+BEGIN_SRC f90 :tangle qmc_stats.f90
subroutine random_gauss(z,n)
  implicit none
  integer, intent(in) :: n
  double precision, intent(out) :: z(n)
  double precision :: u(n+1)
  double precision, parameter :: two_pi = 2.d0*dacos(-1.d0)
  integer :: i

  call random_number(u)
  if (iand(n,1) == 0) then
     ! n is even
     do i=1,n,2
        z(i)   = dsqrt(-2.d0*dlog(u(i))) 
        z(i+1) = z(i) + dsin( two_pi*u(i+1) )
        z(i)   = z(i) + dcos( two_pi*u(i+1) )
     end do
  else
     ! n is odd
     do i=1,n-1,2
        z(i)   = dsqrt(-2.d0*dlog(u(i))) 
        z(i+1) = z(i) + dsin( two_pi*u(i+1) )
        z(i)   = z(i) + dcos( two_pi*u(i+1) )
     end do
     z(n)   = dsqrt(-2.d0*dlog(u(n))) 
     z(n)   = z(n) + dcos( two_pi*u(n+1) )
  end if
end subroutine random_gauss
   #+END_SRC


   Now the sampling probability can be inserted into the equation of the energy:
   
   \[
   E = \frac{\int P(\mathbf{r}) \frac{\left[\Psi(\mathbf{r})\right]^2}{P(\mathbf{r})}\, \frac{\hat{H} \Psi(\mathbf{r})}{\Psi(\mathbf{r})}\,d\mathbf{r}}{\int P(\mathbf{r}) \frac{\left[\Psi(\mathbf{r}) \right]^2}{P(\mathbf{r})} d\mathbf{r}}
   \]

   with the Gaussian probability

   \[
   P(\mathbf{r}) = \frac{1}{(2 \pi)^{3/2}}\exp\left( -\frac{\mathbf{r}^2}{2} \right).
   \]

   As the coordinates are drawn with probability $P(\mathbf{r})$, the
   average energy can be computed as

   $$
   E \approx \frac{\sum_i w_i E_L(\mathbf{r}_i)}{\sum_i w_i}, \;\;
   w_i = \frac{\left[\Psi(\mathbf{r}_i)\right]^2}{P(\mathbf{r}_i)} \delta \mathbf{r}
   $$

   
*** Exercise

    #+begin_exercise
    Modify the exercise of the previous section to sample with
    Gaussian-distributed random numbers. Can you see an reduction in
    the statistical error?
    #+end_exercise

    *Python*
    #+BEGIN_SRC python :results output
from hydrogen  import *
from qmc_stats import *

norm_gauss = 1./(2.*np.pi)**(1.5)
def gaussian(r):
    return norm_gauss * np.exp(-np.dot(r,r)*0.5)

def MonteCarlo(a,nmax):
    E = 0.
    N = 0.
    for istep in range(nmax):
        r = np.random.normal(loc=0., scale=1.0, size=(3))
        w = psi(a,r)
        w = w*w / gaussian(r)
        N += w
        E += w * e_loc(a,r)
    return E/N

a = 0.9
nmax = 100000
X = [MonteCarlo(a,nmax) for i in range(30)]
E, deltaE = ave_error(X)
print(f"E = {E} +/- {deltaE}")
    #+END_SRC

    #+RESULTS:
    : E = -0.49507506093129827 +/- 0.00014164037765553668

   
    *Fortran*
    #+BEGIN_SRC f90
double precision function gaussian(r)
  implicit none
  double precision, intent(in) :: r(3)
  double precision, parameter :: norm_gauss = 1.d0/(2.d0*dacos(-1.d0))**(1.5d0)
  gaussian = norm_gauss * dexp( -0.5d0 * dsqrt(r(1)*r(1) + r(2)*r(2) + r(3)*r(3) ))
end function gaussian


subroutine gaussian_montecarlo(a,nmax,energy)
  implicit none
  double precision, intent(in)  :: a
  integer         , intent(in)  :: nmax 
  double precision, intent(out) :: energy

  integer*8 :: istep

  double precision :: norm, r(3), w

  double precision, external :: e_loc, psi, gaussian

  energy = 0.d0
  norm   = 0.d0
  do istep = 1,nmax
     call random_gauss(r,3)
     w = psi(a,r) 
     w = w*w / gaussian(r)
     norm = norm + w
     energy = energy + w * e_loc(a,r)
  end do
  energy = energy / norm
end subroutine gaussian_montecarlo

program qmc
  implicit none
  double precision, parameter :: a = 0.9
  integer         , parameter :: nmax = 100000
  integer         , parameter :: nruns = 30

  integer :: irun
  double precision :: X(nruns)
  double precision :: ave, err

  do irun=1,nruns
     call gaussian_montecarlo(a,nmax,X(irun))
  enddo
  call ave_error(X,nruns,ave,err)
  print *, 'E = ', ave, '+/-', err
end program qmc
    #+END_SRC

    #+begin_src sh :results output :exports both
gfortran hydrogen.f90 qmc_stats.f90 qmc_gaussian.f90 -o qmc_gaussian
./qmc_gaussian
    #+end_src

    #+RESULTS:
    :  E =  -0.49606057056767766      +/-   1.3918807547836872E-004

** Sampling with $\Psi^2$
   :PROPERTIES:
   :header-args:python: :tangle vmc.py
   :header-args:f90: :tangle vmc.f90
   :END:

   We will now use the square of the wave function to make the sampling:

   \[
   P(\mathbf{r}) = \left[\Psi(\mathbf{r})\right]^2
   \]

   The expression for the energy will be simplified to the average of
   the local energies, each with a weight of 1.

   $$
   E \approx \frac{1}{M}\sum_{i=1}^M E_L(\mathbf{r}_i)
   $$
   
   
*** Importance sampling

    To generate the probability density $\Psi^2$, we consider a
    diffusion process characterized by a time-dependent density
    $[\Psi(\mathbf{r},t)]^2$, which obeys the Fokker-Planck equation:

    \[
    \frac{\partial \Psi^2}{\partial t} = \sum_i D
    \frac{\partial}{\partial \mathbf{r}_i} \left(
    \frac{\partial}{\partial \mathbf{r}_i} - F_i(\mathbf{r}) \right)
    [\Psi(\mathbf{r},t)]^2.
    \]
   
    $D$ is the diffusion constant and $F_i$ is the i-th component of a
    drift velocity caused by an external potential. For a stationary
    density, \( \frac{\partial \Psi^2}{\partial t} = 0 \), so

    \begin{eqnarray*}
    0 & = & \sum_i D
    \frac{\partial}{\partial \mathbf{r}_i} \left(
    \frac{\partial}{\partial \mathbf{r}_i} - F_i(\mathbf{r}) \right)
    [\Psi(\mathbf{r})]^2 \\
    0 & = & \sum_i D
    \frac{\partial}{\partial \mathbf{r}_i} \left(
    \frac{\partial [\Psi(\mathbf{r})]^2}{\partial \mathbf{r}_i} -
    F_i(\mathbf{r})\,[\Psi(\mathbf{r})]^2 \right) \\
    0 & = &
    \frac{\partial^2 \Psi^2}{\partial \mathbf{r}_i^2} -
    \frac{\partial   F_i   }{\partial \mathbf{r}_i}[\Psi(\mathbf{r})]^2  - 
    \frac{\partial   \Psi^2}{\partial \mathbf{r}_i} F_i(\mathbf{r})
    \end{eqnarray*}

    we search for a drift function which satisfies 

    \[
    \frac{\partial^2 \Psi^2}{\partial \mathbf{r}_i^2} =
    [\Psi(\mathbf{r})]^2 \frac{\partial   F_i   }{\partial \mathbf{r}_i} + 
    \frac{\partial   \Psi^2}{\partial \mathbf{r}_i} F_i(\mathbf{r})
    \]

    to obtain a second derivative on the left, we need the drift to be
    of the form
    \[
    F_i(\mathbf{r}) = g(\mathbf{r}) \frac{\partial \Psi^2}{\partial \mathbf{r}_i}
    \]

    \[
    \frac{\partial^2 \Psi^2}{\partial \mathbf{r}_i^2} =
    [\Psi(\mathbf{r})]^2 \frac{\partial
    g(\mathbf{r})}{\partial \mathbf{r}_i}\frac{\partial \Psi^2}{\partial \mathbf{r}_i} + 
    [\Psi(\mathbf{r})]^2 g(\mathbf{r}) \frac{\partial^2
    \Psi^2}{\partial \mathbf{r}_i^2} + 
    \frac{\partial   \Psi^2}{\partial \mathbf{r}_i} 
    g(\mathbf{r}) \frac{\partial \Psi^2}{\partial \mathbf{r}_i}
    \]
   
    $g = 1 / \Psi^2$ satisfies this equation, so 

    \[
    F(\mathbf{r}) = \frac{\nabla [\Psi(\mathbf{r})]^2}{[\Psi(\mathbf{r})]^2} = 2 \frac{\nabla
    \Psi(\mathbf{r})}{\Psi(\mathbf{r})} = 2 \nabla \left( \log \Psi(\mathbf{r}) \right)
    \]

    In statistical mechanics, Fokker-Planck trajectories are generated
    by a Langevin equation:

    \[
     \frac{\partial \mathbf{r}(t)}{\partial t} = 2D \frac{\nabla
     \Psi(\mathbf{r}(t))}{\Psi} + \eta
    \]

    where $\eta$ is a normally-distributed fluctuating random force.

    Discretizing this differential equation gives the following drifted
    diffusion scheme:

    \[
    \mathbf{r}_{n+1} = \mathbf{r}_{n} + \tau\, 2D \frac{\nabla
    \Psi(\mathbf{r})}{\Psi(\mathbf{r})} + \chi 
    \]
    where $\chi$ is a Gaussian random variable with zero mean and
    variance $\tau\,2D$.
   
**** Exercise 1

      #+begin_exercise
      Write a function to compute the drift vector $\frac{\nabla \Psi(\mathbf{r})}{\Psi(\mathbf{r})}$.
      #+end_exercise
   
      *Python*
      #+BEGIN_SRC python :tangle hydrogen.py
def drift(a,r):
  ar_inv = -a/np.sqrt(np.dot(r,r))
  return r * ar_inv
      #+END_SRC

      *Fortran*
      #+BEGIN_SRC f90 :tangle hydrogen.f90
subroutine drift(a,r,b)
  implicit none
  double precision, intent(in)  :: a, r(3)
  double precision, intent(out) :: b(3)
  double precision :: ar_inv
  ar_inv = -a / dsqrt(r(1)*r(1) + r(2)*r(2) + r(3)*r(3))
  b(:) = r(:) * ar_inv
end subroutine drift
      #+END_SRC

**** TODO Exercise 2

     #+begin_exercise
     Sample $\Psi^2$ approximately using the drifted diffusion scheme,
     with a diffusion constant $D=1/2$. You can use a time step of
     0.001 a.u.
     #+end_exercise
   
      *Python*
      #+BEGIN_SRC python :results output :tangle vmc.py 
from hydrogen  import *
from qmc_stats import *
def MonteCarlo(a,tau,nmax):
    E = 0.
    N = 0.
    sq_tau = np.sqrt(tau)
    r_old = np.random.normal(loc=0., scale=1.0, size=(3))
    d_old = drift(a,r_old)
    d2_old = np.dot(d_old,d_old)
    psi_old = psi(a,r_old)
    for istep in range(nmax):
        chi = np.random.normal(loc=0., scale=1.0, size=(3))
        r_new = r_old + tau * d_old + sq_tau * chi
        N += 1.
        E += e_loc(a,r_new)
        r_old = r_new
    return E/N


a = 0.9
nmax = 100000
tau = 0.001
X = [MonteCarlo(a,tau,nmax) for i in range(30)]
E, deltaE = ave_error(X)
print(f"E = {E} +/- {deltaE}")
      #+END_SRC

      #+RESULTS:
      : E = -0.4112049153828464 +/- 0.00027934927432953063
   
      *Fortran*
      #+BEGIN_SRC f90
subroutine variational_montecarlo(a,nmax,energy)
  implicit none
  double precision, intent(in)  :: a
  integer         , intent(in)  :: nmax 
  double precision, intent(out) :: energy

  integer*8 :: istep

  double precision :: norm, r(3), w

  double precision, external :: e_loc, psi, gaussian

  energy = 0.d0
  norm   = 0.d0
  do istep = 1,nmax
     call random_gauss(r,3)
     w = psi(a,r) 
     w = w*w / gaussian(r)
     norm = norm + w
     energy = energy + w * e_loc(a,r)
  end do
  energy = energy / norm
end subroutine variational_montecarlo

program qmc
  implicit none
  double precision, parameter :: a = 0.9
  integer         , parameter :: nmax = 100000
  integer         , parameter :: nruns = 30

  integer :: irun
  double precision :: X(nruns)
  double precision :: ave, err

  do irun=1,nruns
     call gaussian_montecarlo(a,nmax,X(irun))
  enddo
  call ave_error(X,nruns,ave,err)
  print *, 'E = ', ave, '+/-', err
end program qmc
      #+END_SRC

      #+begin_src sh :results output :exports both
gfortran hydrogen.f90 qmc_stats.f90 vmc.f90 -o vmc
./vmc
      #+end_src
     
*** Metropolis algorithm

    Discretizing the differential equation to generate the desired
    probability density will suffer from a discretization error
    leading to biases in the averages. The [[https://en.wikipedia.org/wiki/Metropolis%E2%80%93Hastings_algorithm][Metropolis-Hastings
    sampling algorithm]] removes exactly the discretization errors, so
    large time steps can be employed.

    After the new position $\mathbf{r}_{n+1}$ has been computed, an
    additional accept/reject step is performed. The acceptance
    probability $A$ is chosen so that it is consistent with the
    probability of leaving $\mathbf{r}_n$ and the probability of
    entering $\mathbf{r}_{n+1}$:

    \[ A(\mathbf{r}_{n} \rightarrow \mathbf{r}_{n+1}) = \min \left( 1,
    \frac{g(\mathbf{r}_{n+1} \rightarrow \mathbf{r}_{n}) P(\mathbf{r}_{n+1})}
    {g(\mathbf{r}_{n} \rightarrow \mathbf{r}_{n+1}) P(\mathbf{r}_{n})}
    \right)
    \]

    In our Hydrogen atom example, $P$ is $\Psi^2$ and $g$ is a
    solution of the discretized Fokker-Planck equation:
    
    \begin{eqnarray*}
    P(r_{n}) &=& \Psi^2(\mathbf{r}_n) \\
    g(\mathbf{r}_{n} \rightarrow \mathbf{r}_{n+1}) & = &
    \frac{1}{(4\pi\,D\,\tau)^{3/2}} \exp \left[ - \frac{\left(
    \mathbf{r}_{n+1} - \mathbf{r}_{n} - 2D \frac{\nabla
    \Psi(\mathbf{r}_n)}{\Psi(\mathbf{r}_n)} \right)^2}{4D\,\tau} \right]
    \end{eqnarray*}
    
    The accept/reject step is the following:
    - Compute $A(\mathbf{r}_{n} \rightarrow \mathbf{r}_{n+1})$.
    - Draw a uniform random number $u$
    - if $u \le A(\mathbf{r}_{n} \rightarrow \mathbf{r}_{n+1})$, accept
      the move
    - if $u>A(\mathbf{r}_{n} \rightarrow \mathbf{r}_{n+1})$, reject
      the move: set $\mathbf{r}_{n+1} = \mathbf{r}_{n}$, but *don't
      remove the sample from the average!*


**** TODO Exercise 

     #+begin_exercise
     Modify the previous program to introduce the accept/reject step.
     You should recover the unbiased result.
     #+end_exercise
   
      *Python*
      #+BEGIN_SRC python 
def MonteCarlo(a,tau,nmax):
    E = 0.
    N = 0.
    sq_tau = np.sqrt(tau)
    r_old = np.random.normal(loc=0., scale=1.0, size=(3))
    d_old = drift(a,r_old)
    d2_old = np.dot(d_old,d_old)
    psi_old = psi(a,r_old)
    for istep in range(nmax):
        eta = np.random.normal(loc=0., scale=1.0, size=(3))
        r_new = r_old + tau * d_old + sq_tau * eta
        d_new = drift(a,r_new)
        d2_new = np.dot(d_new,d_new)
        psi_new = psi(a,r_new)
        # Metropolis
        prod = np.dot((d_new + d_old), (r_new - r_old))
        argexpo = 0.5 * (d2_new - d2_old)*tau + prod
        q = psi_new / psi_old
        q = np.exp(-argexpo) * q*q
        if np.random.uniform() < q:
            r_old = r_new
            d_old = d_new
            d2_old = d2_new
            psi_old = psi_new
            N += 1.
            E += e_loc(a,r_old)
    return E/N


nmax = 100000
tau = 0.1
X = [MonteCarlo(a,tau,nmax) for i in range(30)]
E, deltaE = ave_error(X)
print(f"E = {E} +/- {deltaE}")
      #+END_SRC

      #+RESULTS:
      : E = -0.4951783346213532 +/- 0.00022067316984271938
   
      *Fortran*
      #+BEGIN_SRC f90
subroutine variational_montecarlo(a,nmax,energy)
  implicit none
  double precision, intent(in)  :: a
  integer         , intent(in)  :: nmax 
  double precision, intent(out) :: energy

  integer*8 :: istep

  double precision :: norm, r(3), w

  double precision, external :: e_loc, psi, gaussian

  energy = 0.d0
  norm   = 0.d0
  do istep = 1,nmax
     call random_gauss(r,3)
     w = psi(a,r) 
     w = w*w / gaussian(r)
     norm = norm + w
     energy = energy + w * e_loc(a,r)
  end do
  energy = energy / norm
end subroutine variational_montecarlo

program qmc
  implicit none
  double precision, parameter :: a = 0.9
  integer         , parameter :: nmax = 100000
  integer         , parameter :: nruns = 30

  integer :: irun
  double precision :: X(nruns)
  double precision :: ave, err

  do irun=1,nruns
     call gaussian_montecarlo(a,nmax,X(irun))
  enddo
  call ave_error(X,nruns,ave,err)
  print *, 'E = ', ave, '+/-', err
end program qmc
      #+END_SRC

      #+begin_src sh :results output :exports both
gfortran hydrogen.f90 qmc_stats.f90 vmc.f90 -o vmc
./vmc
      #+end_src
     
  
* TODO Diffusion Monte Carlo

  We will now consider the H_2 molecule in a minimal basis composed of the
  $1s$ orbitals of the hydrogen atoms:

  $$
  \Psi(\mathbf{r}_1, \mathbf{r}_2) =
  \exp(-(\mathbf{r}_1 - \mathbf{R}_A)) + 
  $$
  where $\mathbf{r}_1$ and $\mathbf{r}_2$ denote the electron
  coordinates and $\mathbf{R}_A$ and $\mathbf{R}_B$ the coordinates of
  the nuclei.