1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 10:06:09 +01:00

Merge branch 'master' into jastrow_c

This commit is contained in:
Gianfranco Abrusci 2022-04-04 11:22:17 +02:00
commit 35e15205df
9 changed files with 1135 additions and 484 deletions

View File

@ -49,12 +49,12 @@ AS_IF([test -d ${srcdir}/.git],
AC_ARG_WITH(ifort, [AS_HELP_STRING([--with-ifort],[Use Intel Fortran compiler])], with_ifort=$withval, with_ifort=no)
AS_IF([test "$with_ifort" == "yes"], [
FC=ifort
FCFLAGS="-xHost -ip -Ofast -ftz -finline -g -mkl=sequential" ])
FCFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ])
AC_ARG_WITH(icc, [AS_HELP_STRING([--with-icc],[Use Intel C compiler])], with_icc=$withval, with_icc=no)
AS_IF([test "$with_icc" == "yes"], [
CC=icc
CFLAGS="-xHost -ip -Ofast -ftz -finline -g -mkl=sequential" ])
CFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ])
AS_IF([test "$with_icc"."$with_ifort" == "yes.yes"], [
ax_blas_ok="yes"

199
org/examples.org Normal file
View File

@ -0,0 +1,199 @@
#+TITLE: Code examples
#+SETUPFILE: ../tools/theme.setup
#+INCLUDE: ../tools/lib.org
In this section, we present examples of usage of QMCkl.
For simplicity, we assume that the wave function parameters are stores
in a [[https://github.com/TREX-CoE/trexio][TREXIO]] file.
* Checking errors
All QMCkl functions return an error code. A convenient way to handle
errors is to write an error-checking function that displays the
error in text format and exits the program.
#+NAME: qmckl_check_error
#+begin_src f90
subroutine qmckl_check_error(rc, message)
use qmckl
implicit none
integer(qmckl_exit_code), intent(in) :: rc
character(len=*) , intent(in) :: message
character(len=128) :: str_buffer
if (rc /= QMCKL_SUCCESS) then
print *, message
call qmckl_string_of_error(rc, str_buffer)
print *, str_buffer
call exit(rc)
end if
end subroutine qmckl_check_error
#+end_src
* Computing an atomic orbital on a grid
:PROPERTIES:
:header-args: :tangle ao_grid.f90
:END:
The following program, in Fortran, computes the values of an atomic
orbital on a regular 3-dimensional grid. The 100^3 grid points are
automatically defined, such that the molecule fits in a box with 5
atomic units in the borders.
This program uses the ~qmckl_check_error~ function defined above.
To use this program, run
#+begin_src bash :tangle no
$ ao_grid <trexio_file> <AO_id> <point_num>
#+end_src
#+begin_src f90 :noweb yes
<<qmckl_check_error>>
program ao_grid
use qmckl
implicit none
integer(qmckl_context) :: qmckl_ctx ! QMCkl context
integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions
character(len=128) :: trexio_filename
character(len=128) :: str_buffer
integer :: ao_id
integer :: point_num_x
integer(c_int64_t) :: nucl_num
double precision, allocatable :: nucl_coord(:,:)
integer(c_int64_t) :: point_num
integer(c_int64_t) :: ao_num
integer(c_int64_t) :: ipoint, i, j, k
double precision :: x, y, z, dr(3)
double precision :: rmin(3), rmax(3)
double precision, allocatable :: points(:,:)
double precision, allocatable :: ao_vgl(:,:,:)
#+end_src
Start by fetching the command-line arguments:
#+begin_src f90
if (iargc() /= 3) then
print *, 'Syntax: ao_grid <trexio_file> <AO_id> <point_num>'
call exit(-1)
end if
call getarg(1, trexio_filename)
call getarg(2, str_buffer)
read(str_buffer, *) ao_id
call getarg(3, str_buffer)
read(str_buffer, *) point_num_x
if (point_num_x < 0 .or. point_num_x > 300) then
print *, 'Error: 0 < point_num < 300'
call exit(-1)
end if
#+end_src
Create the QMCkl context and initialize it with the wave function
present in the TREXIO file:
#+begin_src f90
qmckl_ctx = qmckl_context_create()
rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename)))
call qmckl_check_error(rc, 'Read TREXIO')
#+end_src
We need to check that ~ao_id~ is in the range, so we get the total
number of AOs from QMCkl:
#+begin_src f90
rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num)
call qmckl_check_error(rc, 'Getting ao_num')
if (ao_id < 0 .or. ao_id > ao_num) then
print *, 'Error: 0 < ao_id < ', ao_num
call exit(-1)
end if
#+end_src
Now we will compute the limits of the box in which the molecule fits.
For that, we first need to ask QMCkl the coordinates of nuclei.
#+begin_src f90
rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num)
call qmckl_check_error(rc, 'Get nucleus num')
allocate( nucl_coord(3, nucl_num) )
rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num)
call qmckl_check_error(rc, 'Get nucleus coord')
#+end_src
We now compute the coordinates of opposite points of the box, and
the distance between points along the 3 directions:
#+begin_src f90
rmin(1) = minval( nucl_coord(1,:) ) - 5.d0
rmin(2) = minval( nucl_coord(2,:) ) - 5.d0
rmin(3) = minval( nucl_coord(3,:) ) - 5.d0
rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0
rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0
rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0
dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1)
#+end_src
We now produce the list of point coordinates where the AO will be
evaluated:
#+begin_src f90
point_num = point_num_x**3
allocate( points(point_num, 3) )
ipoint=0
z = rmin(3)
do k=1,point_num_x
y = rmin(2)
do j=1,point_num_x
x = rmin(1)
do i=1,point_num_x
ipoint = ipoint+1
points(ipoint,1) = x
points(ipoint,2) = y
points(ipoint,3) = z
x = x + dr(1)
end do
y = y + dr(2)
end do
z = z + dr(3)
end do
#+end_src
We give the points to QMCkl:
#+begin_src f90
rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num)
call qmckl_check_error(rc, 'Setting points')
#+end_src
We allocate the space required to retrieve the values, gradients and
Laplacian of all AOs, and ask to retrieve the values of the
AOs computed at the point positions.
#+begin_src f90
allocate( ao_vgl(ao_num, 5, point_num) )
rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num)
call qmckl_check_error(rc, 'Setting points')
#+end_src
We finally print the value of the AO:
#+begin_src f90
do ipoint=1, point_num
print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint)
end do
#+end_src
#+begin_src f90
deallocate( nucl_coord, points, ao_vgl )
end program ao_grid
#+end_src

View File

@ -1,4 +1,3 @@
B
#+TITLE: Atomic Orbitals
#+SETUPFILE: ../tools/theme.setup
#+INCLUDE: ../tools/lib.org
@ -49,6 +48,7 @@ gradients and Laplacian of the atomic basis functions.
#+begin_src c :tangle (eval h_private_func)
#ifndef QMCKL_AO_HPF
#define QMCKL_AO_HPF
#include "qmckl_blas_private_type.h"
#+end_src
@ -57,6 +57,8 @@ gradients and Laplacian of the atomic basis functions.
#define QMCKL_AO_HPT
#include <stdbool.h>
#include <stdio.h>
#include "qmckl_blas_private_type.h"
#+end_src
#+begin_src f90 :tangle (eval f) :noweb yes
@ -105,6 +107,7 @@ int main() {
#include "qmckl.h"
#include "qmckl_context_private_type.h"
#include "qmckl_memory_private_type.h"
#include "qmckl_blas_private_type.h"
#include "qmckl_memory_private_func.h"
#include "qmckl_ao_private_type.h"
#include "qmckl_ao_private_func.h"
@ -136,6 +139,7 @@ int main() {
| ~ao_num~ | ~int64_t~ | Number of AOs |
| ~ao_cartesian~ | ~bool~ | If true, use polynomials. Otherwise, use spherical harmonics |
| ~ao_factor~ | ~double[ao_num]~ | Normalization factor of the AO |
|---------------------+----------------------+----------------------------------------------------------------------|
For H_2 with the following basis set,
@ -274,26 +278,26 @@ typedef struct qmckl_ao_basis_struct {
int64_t shell_num;
int64_t prim_num;
int64_t ao_num;
int64_t * nucleus_index;
int64_t * nucleus_shell_num;
int32_t * shell_ang_mom;
int64_t * shell_prim_num;
int64_t * shell_prim_index;
double * shell_factor;
double * exponent;
double * coefficient;
double * prim_factor;
double * ao_factor;
int64_t * restrict nucleus_index;
int64_t * restrict nucleus_shell_num;
int32_t * restrict shell_ang_mom;
int64_t * restrict shell_prim_num;
int64_t * restrict shell_prim_index;
double * restrict shell_factor;
double * restrict exponent;
double * restrict coefficient;
double * restrict prim_factor;
double * restrict ao_factor;
int64_t * nucleus_prim_index;
double * coefficient_normalized;
int32_t * nucleus_max_ang_mom;
double * nucleus_range;
double * primitive_vgl;
int64_t * restrict nucleus_prim_index;
double * restrict coefficient_normalized;
int32_t * restrict nucleus_max_ang_mom;
double * restrict nucleus_range;
double * restrict primitive_vgl;
uint64_t primitive_vgl_date;
double * shell_vgl;
double * restrict shell_vgl;
uint64_t shell_vgl_date;
double * ao_vgl;
double * restrict ao_vgl;
uint64_t ao_vgl_date;
int32_t uninitialized;
@ -2488,6 +2492,7 @@ free(ao_factor_test);
| ~shell_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions |
| ~ao_vgl~ | ~double[point_num][5][ao_num]~ | Value, gradients, Laplacian of the primitives at current positions |
| ~ao_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions |
|----------------------+-----------------------------------+----------------------------------------------------------------------------------------------|
*** After initialization
@ -2638,11 +2643,12 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
}
#+end_src
*** HPC-specific data structures
*** TODO HPC-specific data structures
For faster access, we provide extra arrays for the shell information as:
- $C_{psa}$ = =coef_per_nucleus (iprim, ishell, inucl)=
- $\gamma_{pa}$ =expo_per_nucleus (iprim, inucl)=
- $C_{psa}$ = =coef_per_nucleus[inucl][ishell][iprim]=
- $\gamma_{pa}$ =expo_per_nucleus[inucl][iprim]=
such that the computation of the radial parts can be vectorized.
@ -2659,6 +2665,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
#+NAME:HPC_struct
#+begin_src c :comments org :exports none
/* HPC specific data structures */
int32_t* restrict prim_num_per_nucleus;
qmckl_tensor coef_per_nucleus;
qmckl_matrix expo_per_nucleus;
#+end_src
@ -2670,11 +2677,32 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context);
#+end_src
#+begin_src c :comments org :tangle (eval c) :exports none
/* Data structure for sorting */
struct combined {
double expo;
int64_t index;
};
/* Comparison function */
int compare_basis( const void * l, const void * r )
{
const struct combined * restrict _l= (const struct combined *)l;
const struct combined * restrict _r= (const struct combined *)r;
if( _l->expo > _r->expo ) return 1;
if( _l->expo < _r->expo ) return -1;
return 0;
}
#ifdef HAVE_HPC
qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
{
qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
mem_info.size = ctx->nucleus.num * sizeof(int32_t);
ctx->ao_basis.prim_num_per_nucleus = (int32_t*) qmckl_malloc(context, mem_info);
/* Find max number of primitives per nucleus */
int64_t shell_max = 0;
int64_t prim_max = 0;
int64_t nucl_num = ctx->nucleus.num;
@ -2690,8 +2718,10 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
}
prim_max = prim_num > prim_max ?
prim_num : prim_max;
ctx->ao_basis.prim_num_per_nucleus[inucl] = prim_num;
}
int64_t size[3] = { prim_max, shell_max, nucl_num };
ctx->ao_basis.coef_per_nucleus = qmckl_tensor_alloc( context, 3, size );
ctx->ao_basis.coef_per_nucleus = qmckl_tensor_set(ctx->ao_basis.coef_per_nucleus, 0.);
@ -2699,9 +2729,10 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
ctx->ao_basis.expo_per_nucleus = qmckl_matrix_alloc( context, prim_max, nucl_num );
ctx->ao_basis.expo_per_nucleus = qmckl_matrix_set(ctx->ao_basis.expo_per_nucleus, 0.);
double expo[prim_max];
struct combined expo[prim_max];
double coef[shell_max][prim_max];
for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
double newcoef[prim_max];
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
memset(expo, 0, sizeof(expo));
memset(coef, 0, sizeof(expo));
@ -2713,23 +2744,85 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell];
const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell];
for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) {
expo[idx] = ctx->ao_basis.coefficient_normalized[iprim];
coef[ishell - ishell_start][idx] = ctx->ao_basis.coefficient_normalized[iprim];
expo[idx].expo = ctx->ao_basis.exponent[iprim];
expo[idx].index = idx;
idx += 1;
}
}
for (int64_t i=0 ; i<prim_max ; ++i) {
qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i];
/* Sort exponents */
qsort( expo, (size_t) idx, sizeof(struct combined), compare_basis );
idx = 0;
int64_t idx2 = 0;
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
memset(newcoef, 0, sizeof(newcoef));
const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell];
const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell];
for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) {
newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim];
idx += 1;
}
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
idx2 = expo[i].index;
coef[ishell - ishell_start][i] = newcoef[idx2];
}
}
for (int64_t j=0 ; j<shell_max ; ++j) {
for (int64_t i=0 ; i<prim_max ; ++i) {
/* Apply ordering to coefficients */
/* Remove duplicates */
int64_t newidx[prim_max];
int64_t idxmax = 0;
idx = 0;
newidx[0] = 0;
for (int32_t i=1 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
if (expo[i].expo != expo[i-1].expo) {
idx += 1;
}
newidx[i] = idx;
}
idxmax = idx;
for (int32_t j=0 ; j<ishell_end-ishell_start ; ++j) {
memset(newcoef, 0, sizeof(newcoef));
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
newcoef[newidx[i]] += coef[j][i];
}
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
coef[j][i] = newcoef[i];
}
}
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
expo[newidx[i]].expo = expo[i].expo;
}
ctx->ao_basis.prim_num_per_nucleus[inucl] = idxmax+1;
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i].expo;
}
for (int32_t j=0 ; j<ishell_end-ishell_start ; ++j) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ) = coef[j][i];
}
}
/*
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
printf("%4ld %4ld %15.5e | ", inucl, i, qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ));
for (int64_t j=0 ; j<ishell_end-ishell_start ; ++j) {
printf("%8.5f ", qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ));
}
printf("\n");
}
printf("\n");
*/
}
return QMCKL_SUCCESS;
}
#endif
@ -4700,13 +4793,13 @@ end function qmckl_ao_polynomial_transp_vgl_doc_f
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code
qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
const double* X,
const double* R,
const double* restrict X,
const double* restrict R,
const int32_t lmax,
int64_t* n,
int32_t* const L,
int64_t* restrict n,
int32_t* restrict const L,
const int64_t ldl,
double* const VGL,
double* restrict const VGL,
const int64_t ldv )
{
const qmckl_context_struct* ctx = (qmckl_context_struct* const) context;
@ -4742,7 +4835,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
} else {
int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)};
int32_t* restrict const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)};
l[0][0] = 0; l[0][1] = 0; l[0][2] = 0;
l[1][0] = 1; l[1][1] = 0; l[1][2] = 0;
l[2][0] = 0; l[2][1] = 1; l[2][2] = 0;
@ -4763,11 +4856,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
} else {
double* const vgl1 = VGL;
double* const vgl2 = VGL + ldv;
double* const vgl3 = VGL + (ldv << 1);
double* const vgl4 = VGL + ldv + (ldv << 1);
double* const vgl5 = VGL + (ldv << 2);
double* restrict const vgl1 = VGL;
double* restrict const vgl2 = VGL + ldv;
double* restrict const vgl3 = VGL + (ldv << 1);
double* restrict const vgl4 = VGL + ldv + (ldv << 1);
double* restrict const vgl5 = VGL + (ldv << 2);
for (int32_t k=0 ; k<4 ; ++k) {
vgl2[k] = 0.0;
@ -4797,7 +4890,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
L[i] = ltmp[i];
} else {
int32_t* l[10];
int32_t* restrict l[10];
for (int32_t i=0 ; i<10 ; ++i) {
l[i] = L + i*ldl;
}
@ -4833,11 +4926,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
for (int i=0 ; i<50 ; ++i)
VGL[i] = tmp[i];
} else {
double* const vgl1 = VGL;
double* const vgl2 = VGL + ldv;
double* const vgl3 = VGL + (ldv << 1);
double* const vgl4 = VGL + 3*ldv;
double* const vgl5 = VGL + (ldv << 2);
double* restrict const vgl1 = VGL;
double* restrict const vgl2 = VGL + ldv;
double* restrict const vgl3 = VGL + (ldv << 1);
double* restrict const vgl4 = VGL + 3*ldv;
double* restrict const vgl5 = VGL + (ldv << 2);
vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1];
vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1];
@ -4872,11 +4965,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6;
if (ldv < size_max) return QMCKL_INVALID_ARG_9;
double* const vgl1 = VGL;
double* const vgl2 = VGL + ldv;
double* const vgl3 = VGL + (ldv<<1);
double* const vgl4 = VGL + ldv + (ldv<<1);
double* const vgl5 = VGL + (ldv<<2);
double* restrict const vgl1 = VGL;
double* restrict const vgl2 = VGL + ldv;
double* restrict const vgl3 = VGL + (ldv<<1);
double* restrict const vgl4 = VGL + ldv + (ldv<<1);
double* restrict const vgl5 = VGL + (ldv<<2);
const double Y[3] = { X[0]-R[0],
X[1]-R[1],
@ -5375,6 +5468,7 @@ end function qmckl_compute_ao_vgl_doc_f
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
| ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs |
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
#ifdef HAVE_HPC
qmckl_exit_code
@ -5382,22 +5476,20 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int64_t prim_num,
const int32_t* restrict prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* coord,
const double* nucl_coord,
const int64_t* nucleus_index,
const int64_t* nucleus_shell_num,
const double* restrict coord,
const double* restrict nucl_coord,
const int64_t* restrict nucleus_index,
const int64_t* restrict nucleus_shell_num,
const double* nucleus_range,
const int32_t* nucleus_max_ang_mom,
const int32_t* shell_ang_mom,
const int64_t* shell_prim_index,
const int64_t* shell_prim_num,
const double* ao_factor,
const double* expo,
const double* coef_normalized,
double* const ao_vgl )
const int32_t* restrict nucleus_max_ang_mom,
const int32_t* restrict shell_ang_mom,
const double* restrict ao_factor,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_vgl )
{
int32_t lstart[32];
for (int32_t l=0 ; l<32 ; ++l) {
@ -5406,9 +5498,15 @@ qmckl_compute_ao_vgl_hpc_gaussian (
int64_t ao_index[shell_num+1];
int64_t size_max = 0;
int64_t prim_max = 0;
int64_t shell_max = 0;
{
int64_t k=0;
for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
prim_max = prim_num_per_nucleus[inucl] > prim_max ?
prim_num_per_nucleus[inucl] : prim_max;
shell_max = nucleus_shell_num[inucl] > shell_max ?
nucleus_shell_num[inucl] : shell_max;
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
@ -5425,14 +5523,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
double cutoff = -log(1.e-12);
#ifdef HAVE_OPENMP
#pragma omp parallel
#pragma omp parallel
#endif
{
qmckl_exit_code rc;
double c_[prim_num];
double expo_[prim_num];
double ar2[prim_num];
int32_t powers[prim_num];
double ar2[prim_max];
int32_t powers[prim_max];
double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
@ -5444,8 +5540,20 @@ qmckl_compute_ao_vgl_hpc_gaussian (
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max];
double exp_mat[prim_max][8];
double ce_mat[shell_max][8];
double coef_mat[nucl_num][shell_max][prim_max];
for (int i=0 ; i<nucl_num ; ++i) {
for (int j=0 ; j<shell_max; ++j) {
for (int k=0 ; k<prim_max; ++k) {
coef_mat[i][j][k] = qmckl_ten3(coef_per_nucleus,k, j, i);
}
}
}
#ifdef HAVE_OPENMP
#pragma omp for
#pragma omp for
#endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
const double e_coord[3] = { coord[ipoint],
@ -5510,58 +5618,76 @@ qmckl_compute_ao_vgl_hpc_gaussian (
break;
}
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
/* Compute all exponents */
int64_t nidx = 0;
const int64_t iprim_start = shell_prim_index[ishell];
const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell];
for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) {
const double v = expo[iprim]*r2;
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
if (v <= cutoff) {
ar2[nidx] = v;
c_[nidx] = coef_normalized[iprim];
expo_[nidx] = expo[iprim];
ar2[iprim] = v;
++nidx;
} else {
break;
}
}
double s1_ = 0.;
double s5_ = 0.;
double s6_ = 0.;
for (int idx=0 ; idx<nidx ; ++idx) {
const double v = c_[idx] * exp(-ar2[idx]);
const double t = v*expo_[idx];
s1_ += v;
s6_ -= t;
s5_ += t*(4.0*ar2[idx]-6.0);
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim][0] = exp(-ar2[iprim]);
}
s6_ += s6_;
const double s1 = s1_;
const double s2 = s6_*x;
const double s3 = s6_*y;
const double s4 = s6_*z;
const double s5 = s5_;
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0];
f = -f-f;
exp_mat[iprim][1] = f * x;
exp_mat[iprim][2] = f * y;
exp_mat[iprim][3] = f * z;
exp_mat[iprim][4] = f * (3.0 - 2.0 * ar2[iprim]);
}
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] = 0.;
}
}
for (int k=0 ; k<nidx; ++k) {
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
if (coef_mat[inucl][i][k] != 0.) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] += coef_mat[inucl][i][k] * exp_mat[k][j];
}
}
}
}
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const double s1 = ce_mat[ishell-ishell_start][0];
if (s1 == 0.0) continue;
const double s2 = ce_mat[ishell-ishell_start][1];
const double s3 = ce_mat[ishell-ishell_start][2];
const double s4 = ce_mat[ishell-ishell_start][3];
const double s5 = ce_mat[ishell-ishell_start][4];
const int64_t k = ao_index[ishell];
double* __restrict__ const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
double* restrict const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l];
double* __restrict__ const ao_vgl_2 = ao_vgl_1 + ao_num;
double* __restrict__ const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
double* __restrict__ const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
double* __restrict__ const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
double* restrict const ao_vgl_2 = ao_vgl_1 + ao_num;
double* restrict const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
double* restrict const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
double* __restrict__ poly_vgl_1;
double* __restrict__ poly_vgl_2;
double* __restrict__ poly_vgl_3;
double* __restrict__ poly_vgl_4;
double* __restrict__ poly_vgl_5;
double* restrict poly_vgl_1 = NULL;
double* restrict poly_vgl_2 = NULL;
double* restrict poly_vgl_3 = NULL;
double* restrict poly_vgl_4 = NULL;
double* restrict poly_vgl_5 = NULL;
if (nidx > 0) {
const double* __restrict__ f = ao_factor + k;
const double* restrict f = ao_factor + k;
const int64_t idx = lstart[l];
switch (nucleus_max_ang_mom[inucl]) {
@ -5578,6 +5704,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
break;
default:
poly_vgl_1 = &(poly_vgl[0][idx]);
@ -5596,7 +5723,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
break;
case (3):
#ifdef HAVE_OPENMP
#pragma omp simd
#pragma omp simd
#endif
for (int il=0 ; il<3 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
@ -5609,16 +5736,16 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_4[il] * s4 )) * f[il];
}
break;
case(5):
case(6):
#ifdef HAVE_OPENMP
#pragma omp simd
#pragma omp simd
#endif
for (int il=0 ; il<5 ; ++il) {
for (int il=0 ; il<6 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
@ -5626,7 +5753,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
break;
default:
#ifdef HAVE_OPENMP
#pragma omp simd simdlen(8)
#pragma omp simd simdlen(8)
#endif
for (int il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
@ -5650,10 +5777,10 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
}
}
}
}
}
}
}
}
return QMCKL_SUCCESS;
}
#endif
@ -5688,7 +5815,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int64_t prim_num,
const int32_t* prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* coord,
@ -5698,11 +5825,9 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const double* nucleus_range,
const int32_t* nucleus_max_ang_mom,
const int32_t* shell_ang_mom,
const int64_t* shell_prim_index,
const int64_t* shell_prim_num,
const double* ao_factor,
const double* expo,
const double* coef_normalized,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* const ao_vgl );
#endif
#+end_src
@ -5830,7 +5955,7 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
rc = qmckl_compute_ao_vgl_hpc_gaussian(context,
ctx->ao_basis.ao_num,
ctx->ao_basis.shell_num,
ctx->ao_basis.prim_num,
ctx->ao_basis.prim_num_per_nucleus,
ctx->point.num,
ctx->nucleus.num,
ctx->point.coord.data,
@ -5840,11 +5965,9 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
ctx->ao_basis.nucleus_range,
ctx->ao_basis.nucleus_max_ang_mom,
ctx->ao_basis.shell_ang_mom,
ctx->ao_basis.shell_prim_index,
ctx->ao_basis.shell_prim_num,
ctx->ao_basis.ao_factor,
ctx->ao_basis.exponent,
ctx->ao_basis.coefficient_normalized,
ctx->ao_basis.expo_per_nucleus,
ctx->ao_basis.coef_per_nucleus,
ctx->ao_basis.ao_vgl);
/*
} else if (ctx->ao_basis.type == 'S') {
@ -5919,77 +6042,74 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
import numpy as np
from math import sqrt
h0 = 1.e-4
def f(a,x,y):
return np.sum( [c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] )
def fx(a,x,y):
return f(a,x,y) * (x[0] - y[0])**2
def df(a,x,y,n):
h0 = 1.e-6
if n == 1: h = np.array([h0,0.,0.])
elif n == 2: h = np.array([0.,h0,0.])
elif n == 3: h = np.array([0.,0.,h0])
return ( f(a,x+h,y) - f(a,x-h,y) ) / (2.*h0)
return ( fx(a,x+h,y) - fx(a,x-h,y) ) / (2.*h0)
# return np.sum( [-2.*b * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) * (x-y)[n-1]
def d2f(a,x,y,n):
h0 = 1.e-6
if n == 1: h = np.array([h0,0.,0.])
elif n == 2: h = np.array([0.,h0,0.])
elif n == 3: h = np.array([0.,0.,h0])
return ( f(a,x+h,y) - 2.*f(a,x,y) + f(a,x-h,y) ) / h0**2
return ( fx(a,x+h,y) - 2.*fx(a,x,y) + fx(a,x-h,y) ) / h0**2
# return np.sum( [( (2.*b*(x-y)[n-1])**2 -2.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] )
def lf(a,x,y):
# return np.sum( [( (2.*b*np.linalg.norm(x-y))**2 -6.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] )
return d2f(a,x,y,1) + d2f(a,x,y,2) + d2f(a,x,y,3)
elec_26_w1 = np.array( [ 1.49050402641, 2.90106987953, -1.05920815468 ] )
elec_15_w2 = np.array( [ -2.20180344582,-1.9113150239, 2.2193744778600002 ] )
nucl_1 = np.array( [ -2.302574592081335e+00, -3.542027060505035e-01, -5.334129934317614e-02] )
#double ao_vgl[prim_num][5][elec_num];
x = elec_26_w1 ; y = nucl_1
a = [( 403.830000, 0.001473 * 5.9876577632594533e+04),
( 121.170000, 0.012672 * 7.2836806319891484e+03),
( 46.345000, 0.058045 * 1.3549226646722386e+03),
( 19.721000, 0.170510 * 3.0376315094739988e+02),
( 8.862400, 0.318596 * 7.4924579607137730e+01),
( 3.996200, 0.384502 * 1.8590543353806009e+01),
( 1.763600, 0.273774 * 4.4423176930919421e+00),
( 0.706190, 0.074397 * 8.9541051939952665e-01)]
a = [( 4.0382999999999998e+02, 1.4732000000000000e-03 * 5.9876577632594533e+04),
( 1.2117000000000000e+02, 1.2672500000000000e-02 * 7.2836806319891484e+03),
( 4.6344999999999999e+01, 5.8045100000000002e-02 * 1.3549226646722386e+03),
( 1.9721000000000000e+01, 1.7051030000000000e-01 * 3.0376315094739988e+02),
( 8.8623999999999992e+00, 3.1859579999999998e-01 * 7.4924579607137730e+01),
( 3.9962000000000000e+00, 3.8450230000000002e-01 * 1.8590543353806009e+01),
( 1.7636000000000001e+00, 2.7377370000000001e-01 * 4.4423176930919421e+00),
( 7.0618999999999998e-01, 7.4396699999999996e-02 * 8.9541051939952665e-01)]
norm = sqrt(3.)
print ( "[0][26][219] : %25.15e"%(f(a,x,y) * (x[0] - y[0])**2) )
print ( "[1][26][219] : %25.15e"%(df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + 2.*f(a,x,y) * (x[0] - y[0])) )
# x^2 * g(r)
print ( "[26][0][219] : %25.15e"%(fx(a,x,y)) )
print ( "[26][1][219] : %25.15e"%(df(a,x,y,1)) )
print ( "[26][2][219] : %25.15e"%(df(a,x,y,2)) )
print ( "[26][3][219] : %25.15e"%(df(a,x,y,3)) )
print ( "[26][4][219] : %25.15e"%(lf(a,x,y)) )
print ( "[0][26][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) ))
print ( "[1][26][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) )
print ( "[26][0][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) ))
print ( "[26][1][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) )
print ( "[0][26][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) )
print ( "[1][26][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) )
print ( "[26][0][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) )
print ( "[26][1][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) )
print ( "[0][26][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[1][26][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[26][0][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[26][1][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[0][26][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[1][26][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[26][0][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[26][1][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[0][26][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) )
print ( "[1][26][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) )
print ( "[26][0][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) )
print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) )
#+end_src
#+RESULTS:
#+begin_example
[0][26][219] : 1.020302912653649e-08
[1][26][219] : -4.153046808203204e-08
[0][26][220] : 1.516649653540510e-08
[1][26][220] : -7.725252615816528e-08
[0][26][221] : -4.686389780112468e-09
[1][26][221] : 2.387073693851122e-08
[0][26][222] : 7.514847283937212e-09
[1][26][222] : -4.025905373647693e-08
[0][26][223] : -4.021924592380977e-09
[1][26][223] : 2.154652944642284e-08
[0][26][224] : 7.175074806631758e-10
[1][26][224] : -3.843880138733679e-09
#+end_example
#+begin_src c :tangle (eval c_test) :exports none
{
@ -6021,32 +6141,69 @@ rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]),
assert (rc == QMCKL_SUCCESS);
printf("\n");
printf(" ao_vgl ao_vgl[0][26][219] %25.15e\n", ao_vgl[26][0][219]);
printf(" ao_vgl ao_vgl[1][26][219] %25.15e\n", ao_vgl[26][1][219]);
printf(" ao_vgl ao_vgl[0][26][220] %25.15e\n", ao_vgl[26][0][220]);
printf(" ao_vgl ao_vgl[1][26][220] %25.15e\n", ao_vgl[26][1][220]);
printf(" ao_vgl ao_vgl[0][26][221] %25.15e\n", ao_vgl[26][0][221]);
printf(" ao_vgl ao_vgl[1][26][221] %25.15e\n", ao_vgl[26][1][221]);
printf(" ao_vgl ao_vgl[0][26][222] %25.15e\n", ao_vgl[26][0][222]);
printf(" ao_vgl ao_vgl[1][26][222] %25.15e\n", ao_vgl[26][1][222]);
printf(" ao_vgl ao_vgl[0][26][223] %25.15e\n", ao_vgl[26][0][223]);
printf(" ao_vgl ao_vgl[1][26][223] %25.15e\n", ao_vgl[26][1][223]);
printf(" ao_vgl ao_vgl[0][26][224] %25.15e\n", ao_vgl[26][0][224]);
printf(" ao_vgl ao_vgl[1][26][224] %25.15e\n", ao_vgl[26][1][224]);
printf(" ao_vgl ao_vgl[26][0][219] %25.15e\n", ao_vgl[26][0][219]);
printf(" ao_vgl ao_vgl[26][1][219] %25.15e\n", ao_vgl[26][1][219]);
printf(" ao_vgl ao_vgl[26][2][219] %25.15e\n", ao_vgl[26][2][219]);
printf(" ao_vgl ao_vgl[26][3][219] %25.15e\n", ao_vgl[26][3][219]);
printf(" ao_vgl ao_vgl[26][4][219] %25.15e\n", ao_vgl[26][4][219]);
printf(" ao_vgl ao_vgl[26][0][220] %25.15e\n", ao_vgl[26][0][220]);
printf(" ao_vgl ao_vgl[26][1][220] %25.15e\n", ao_vgl[26][1][220]);
printf(" ao_vgl ao_vgl[26][2][220] %25.15e\n", ao_vgl[26][2][220]);
printf(" ao_vgl ao_vgl[26][3][220] %25.15e\n", ao_vgl[26][3][220]);
printf(" ao_vgl ao_vgl[26][4][220] %25.15e\n", ao_vgl[26][4][220]);
printf(" ao_vgl ao_vgl[26][0][221] %25.15e\n", ao_vgl[26][0][221]);
printf(" ao_vgl ao_vgl[26][1][221] %25.15e\n", ao_vgl[26][1][221]);
printf(" ao_vgl ao_vgl[26][2][221] %25.15e\n", ao_vgl[26][2][221]);
printf(" ao_vgl ao_vgl[26][3][221] %25.15e\n", ao_vgl[26][3][221]);
printf(" ao_vgl ao_vgl[26][4][221] %25.15e\n", ao_vgl[26][4][221]);
printf(" ao_vgl ao_vgl[26][0][222] %25.15e\n", ao_vgl[26][0][222]);
printf(" ao_vgl ao_vgl[26][1][222] %25.15e\n", ao_vgl[26][1][222]);
printf(" ao_vgl ao_vgl[26][2][222] %25.15e\n", ao_vgl[26][2][222]);
printf(" ao_vgl ao_vgl[26][3][222] %25.15e\n", ao_vgl[26][3][222]);
printf(" ao_vgl ao_vgl[26][4][222] %25.15e\n", ao_vgl[26][4][222]);
printf(" ao_vgl ao_vgl[26][0][223] %25.15e\n", ao_vgl[26][0][223]);
printf(" ao_vgl ao_vgl[26][1][223] %25.15e\n", ao_vgl[26][1][223]);
printf(" ao_vgl ao_vgl[26][2][223] %25.15e\n", ao_vgl[26][2][223]);
printf(" ao_vgl ao_vgl[26][3][223] %25.15e\n", ao_vgl[26][3][223]);
printf(" ao_vgl ao_vgl[26][4][223] %25.15e\n", ao_vgl[26][4][223]);
printf(" ao_vgl ao_vgl[26][0][224] %25.15e\n", ao_vgl[26][0][224]);
printf(" ao_vgl ao_vgl[26][1][224] %25.15e\n", ao_vgl[26][1][224]);
printf(" ao_vgl ao_vgl[26][2][224] %25.15e\n", ao_vgl[26][2][224]);
printf(" ao_vgl ao_vgl[26][3][224] %25.15e\n", ao_vgl[26][3][224]);
printf(" ao_vgl ao_vgl[26][4][224] %25.15e\n", ao_vgl[26][4][224]);
printf("\n");
assert( fabs(ao_vgl[26][0][219] - ( 1.020298798341620e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][219] - (-4.928035238010602e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][219] - ( -4.928035238010602e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][219] - ( -4.691009312035986e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][219] - ( 1.449504046436699e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][219] - ( 4.296442111843973e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][220] - ( 1.516643537739178e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][220] - (-7.725221462603871e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][221] - (-4.686370882518819e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][220] - ( -7.725221462603871e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][220] - ( -6.507140835104833e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][220] - ( 2.154644255710413e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][220] - ( 6.365449359656352e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][221] - ( -4.686370882518819e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][221] - ( 2.387064067626827e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][221] - ( 2.154644255710412e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][221] - ( -1.998731863512374e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][221] - ( -1.966899656441993e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][222] - ( 7.514816980753531e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][222] - (-4.025889138635182e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][223] - (-4.021908374204471e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][222] - ( -4.025889138635182e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][222] - ( -2.993372555126361e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][222] - ( 1.067604670272904e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][222] - ( 3.168199650002648e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][223] - ( -4.021908374204471e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][223] - ( 2.154644255710413e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][223] - ( 1.725594944732276e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][223] - ( -1.715339357718333e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][223] - ( -1.688020516893476e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][224] - ( 7.175045873560788e-10)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][224] - ( -3.843864637762753e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][224] - ( -3.298857850451910e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][224] - ( -4.073047518790881e-10)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][224] - ( 3.153244195820293e-08)) < 1.e-14 );
}
#+end_src
@ -6100,6 +6257,5 @@ assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 );
# -*- mode: org -*-
# vim: syntax=c
* TODO [0/1] Missing features :noexport:
- [ ] Error messages to tell what is missing when initializing

View File

@ -85,7 +85,7 @@ are not intended to be passed to external codes.
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_vector {
int64_t size;
double* data;
double* restrict data;
} qmckl_vector;
#+end_src
@ -161,7 +161,7 @@ qmckl_vector_free( qmckl_context context,
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_matrix {
int64_t size[2];
double* data;
double* restrict data;
} qmckl_matrix;
#+end_src
@ -247,7 +247,7 @@ qmckl_matrix_free( qmckl_context context,
typedef struct qmckl_tensor {
int64_t order;
int64_t size[QMCKL_TENSOR_ORDER_MAX];
double* data;
double* restrict data;
} qmckl_tensor;
#+end_src

View File

@ -1239,9 +1239,9 @@ assert(rc == QMCKL_SUCCESS);
assert(qmckl_ao_basis_provided(context));
double ao_vgl[5][walk_num][elec_num][chbrclf_ao_num];
double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num];
rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num);
rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num);
assert (rc == QMCKL_SUCCESS);
/* Set up MO data */
@ -1256,8 +1256,8 @@ assert (rc == QMCKL_SUCCESS);
assert(qmckl_mo_basis_provided(context));
double mo_vgl[5][elec_num][chbrclf_mo_num];
rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0]));
double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num];
rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num);
assert (rc == QMCKL_SUCCESS);
/* Set up determinant data */

View File

@ -655,9 +655,9 @@ assert(rc == QMCKL_SUCCESS);
assert(qmckl_ao_basis_provided(context));
double ao_vgl[5][walk_num][elec_num][chbrclf_ao_num];
double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num];
rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0][0]),
rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]),
(int64_t) 5*walk_num*elec_num*chbrclf_ao_num);
assert (rc == QMCKL_SUCCESS);
@ -673,8 +673,8 @@ assert (rc == QMCKL_SUCCESS);
assert(qmckl_mo_basis_provided(context));
double mo_vgl[5][elec_num][chbrclf_mo_num];
rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0]));
double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num];
rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num);
assert (rc == QMCKL_SUCCESS);
/* Set up determinant data */

View File

@ -84,13 +84,14 @@ int main() {
The following arrays are stored in the context:
|---------------+--------------------+----------------------|
|-----------------+--------------------+----------------------------------------|
| ~mo_num~ | | Number of MOs |
| ~coefficient~ | ~[mo_num][ao_num]~ | Orbital coefficients |
| ~coefficient_t~ | ~[ao_num][mo_num]~ | Transposed of the Orbital coefficients |
|-----------------+--------------------+----------------------------------------|
Computed data:
|---------------+--------------------------+-------------------------------------------------------------------------------------|
|---------------+--------------------------+-------------------------------------------------------------------------------------|
| ~mo_vgl~ | ~[point_num][5][mo_num]~ | Value, gradients, Laplacian of the MOs at point positions |
| ~mo_vgl_date~ | ~uint64_t~ | Late modification date of Value, gradients, Laplacian of the MOs at point positions |
@ -101,9 +102,10 @@ int main() {
#+begin_src c :comments org :tangle (eval h_private_type)
typedef struct qmckl_mo_basis_struct {
int64_t mo_num;
double * coefficient;
double * restrict coefficient;
double * restrict coefficient_t;
double * mo_vgl;
double * restrict mo_vgl;
uint64_t mo_vgl_date;
int32_t uninitialized;
@ -355,6 +357,34 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) {
qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
assert (ctx != NULL);
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
mem_info.size = ctx->ao_basis.ao_num * ctx->mo_basis.mo_num * sizeof(double);
double* new_array = (double*) qmckl_malloc(context, mem_info);
if (new_array == NULL) {
return qmckl_failwith( context,
QMCKL_ALLOCATION_FAILED,
"qmckl_finalize_mo_basis",
NULL);
}
assert (ctx->mo_basis.coefficient != NULL);
if (ctx->mo_basis.coefficient_t != NULL) {
qmckl_exit_code rc = qmckl_free(context, ctx->mo_basis.coefficient);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context, rc,
"qmckl_finalize_mo_basis",
NULL);
}
}
for (int64_t i=0 ; i<ctx->ao_basis.ao_num ; ++i) {
for (int64_t j=0 ; j<ctx->mo_basis.mo_num ; ++j) {
new_array[i*ctx->mo_basis.mo_num + j] = ctx->mo_basis.coefficient[j*ctx->ao_basis.ao_num + i];
}
}
ctx->mo_basis.coefficient_t = new_array;
qmckl_exit_code rc = QMCKL_SUCCESS;
return rc;
}
@ -367,11 +397,18 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) {
*** Get
#+begin_src c :comments org :tangle (eval h_func) :noweb yes
qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_vgl);
qmckl_exit_code
qmckl_get_mo_basis_mo_vgl(qmckl_context context,
double* const mo_vgl,
const int64_t size_max);
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_vgl) {
qmckl_exit_code
qmckl_get_mo_basis_mo_vgl(qmckl_context context,
double* const mo_vgl,
const int64_t size_max)
{
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return QMCKL_NULL_CONTEXT;
@ -388,7 +425,13 @@ qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_v
qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
assert (ctx != NULL);
size_t sze = 5 * ctx->point.num * ctx->mo_basis.mo_num;
const int64_t sze = ctx->point.num * 5 * ctx->mo_basis.mo_num;
if (size_max < sze) {
return qmckl_failwith( context,
QMCKL_INVALID_ARG_3,
"qmckl_get_mo_basis_mo_vgl",
"input array too small");
}
memcpy(mo_vgl, ctx->mo_basis.mo_vgl, sze * sizeof(double));
return QMCKL_SUCCESS;
@ -397,15 +440,82 @@ qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_v
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_get_mo_basis_vgl (context, mo_vgl) &
bind(C)
integer(c_int32_t) function qmckl_get_mo_basis_mo_vgl (context, &
mo_vgl, size_max) bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
double precision, intent(out) :: mo_vgl(*)
end function
integer (c_int64_t) , intent(in) , value :: size_max
end function qmckl_get_mo_basis_mo_vgl
end interface
#+end_src
Uses the given array to compute the VGL.
#+begin_src c :comments org :tangle (eval h_func) :noweb yes
qmckl_exit_code
qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context,
double* const mo_vgl,
const int64_t size_max);
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
qmckl_exit_code
qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context,
double* const mo_vgl,
const int64_t size_max)
{
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith( context,
QMCKL_INVALID_CONTEXT,
"qmckl_get_mo_basis_mo_vgl",
NULL);
}
qmckl_exit_code rc;
qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
assert (ctx != NULL);
const int64_t sze = ctx->mo_basis.mo_num * 5 * ctx->point.num;
if (size_max < sze) {
return qmckl_failwith( context,
QMCKL_INVALID_ARG_3,
"qmckl_get_mo_basis_mo_vgl",
"input array too small");
}
rc = qmckl_context_touch(context);
if (rc != QMCKL_SUCCESS) return rc;
double* old_array = ctx->mo_basis.mo_vgl;
ctx->mo_basis.mo_vgl = mo_vgl;
rc = qmckl_provide_mo_vgl(context);
if (rc != QMCKL_SUCCESS) return rc;
ctx->mo_basis.mo_vgl = old_array;
return QMCKL_SUCCESS;
}
#+end_src
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_get_mo_basis_mo_vgl_inplace (context, &
mo_vgl, size_max) bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
double precision, intent(out) :: mo_vgl(*)
integer (c_int64_t) , intent(in) , value :: size_max
end function qmckl_get_mo_basis_mo_vgl_inplace
end interface
#+end_src
@ -462,17 +572,17 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context)
if (mo_vgl == NULL) {
return qmckl_failwith( context,
QMCKL_ALLOCATION_FAILED,
"qmckl_mo_basis_vgl",
"qmckl_mo_basis_mo_vgl",
NULL);
}
ctx->mo_basis.mo_vgl = mo_vgl;
}
rc = qmckl_compute_mo_basis_vgl(context,
rc = qmckl_compute_mo_basis_mo_vgl(context,
ctx->ao_basis.ao_num,
ctx->mo_basis.mo_num,
ctx->point.num,
ctx->mo_basis.coefficient,
ctx->mo_basis.coefficient_t,
ctx->ao_basis.ao_vgl,
ctx->mo_basis.mo_vgl);
if (rc != QMCKL_SUCCESS) {
@ -488,25 +598,33 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context)
*** Compute
:PROPERTIES:
:Name: qmckl_compute_mo_basis_vgl
:Name: qmckl_compute_mo_basis_mo_vgl
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
#+NAME: qmckl_mo_basis_vgl_args
| ~qmckl_context~ | ~context~ | in | Global state |
| ~int64_t~ | ~ao_num~ | in | Number of AOs |
| ~int64_t~ | ~mo_num~ | in | Number of MOs |
| ~int64_t~ | ~point_num~ | in | Number of points |
| ~double~ | ~coef_normalized[mo_num][ao_num]~ | in | AO to MO transformation matrix |
| ~double~ | ~ao_vgl[point_num][5][ao_num]~ | in | Value, gradients and Laplacian of the AOs |
| ~double~ | ~mo_vgl[point_num][5][mo_num]~ | out | Value, gradients and Laplacian of the MOs |
#+NAME: qmckl_mo_basis_mo_vgl_args
| Variable | Type | In/Out | Description |
|---------------------+--------------------------------+--------+-------------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
| ~mo_num~ | ~int64_t~ | in | Number of MOs |
| ~point_num~ | ~int64_t~ | in | Number of points |
| ~coef_normalized_t~ | ~double[mo_num][ao_num]~ | in | Transpose of the AO to MO transformation matrix |
| ~ao_vgl~ | ~double[point_num][5][ao_num]~ | in | Value, gradients and Laplacian of the AOs |
| ~mo_vgl~ | ~double[point_num][5][mo_num]~ | out | Value, gradients and Laplacian of the MOs |
The matrix of AO values is very sparse, so we use a sparse-dense
matrix multiplication instead of a dgemm, as exposed in
https://dx.doi.org/10.1007/978-3-642-38718-0_14.
#+begin_src f90 :comments org :tangle (eval f) :noweb yes
integer function qmckl_compute_mo_basis_vgl_f(context, &
integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, &
ao_num, mo_num, point_num, &
coef_normalized, ao_vgl, mo_vgl) &
coef_normalized_t, ao_vgl, mo_vgl) &
result(info)
use qmckl
implicit none
@ -514,54 +632,68 @@ integer function qmckl_compute_mo_basis_vgl_f(context, &
integer*8 , intent(in) :: ao_num, mo_num
integer*8 , intent(in) :: point_num
double precision , intent(in) :: ao_vgl(ao_num,5,point_num)
double precision , intent(in) :: coef_normalized(ao_num,mo_num)
double precision , intent(in) :: coef_normalized_t(mo_num,ao_num)
double precision , intent(out) :: mo_vgl(mo_num,5,point_num)
character :: TransA, TransB
double precision :: alpha, beta
integer*8 :: M, N, K, LDA, LDB, LDC, i,j
integer*8 :: i,j,k
double precision :: c1, c2, c3, c4, c5
TransA = 'T'
TransB = 'N'
M = mo_num
N = point_num*5_8
K = int(ao_num,8)
alpha = 1.d0
beta = 0.d0
LDA = size(coef_normalized,1)
LDB = size(ao_vgl,1)
LDC = size(mo_vgl,1)
do j=1,point_num
mo_vgl(:,:,j) = 0.d0
do k=1,ao_num
if (ao_vgl(k,1,j) /= 0.d0) then
c1 = ao_vgl(k,1,j)
c2 = ao_vgl(k,2,j)
c3 = ao_vgl(k,3,j)
c4 = ao_vgl(k,4,j)
c5 = ao_vgl(k,5,j)
do i=1,mo_num
mo_vgl(i,1,j) = mo_vgl(i,1,j) + coef_normalized_t(i,k) * c1
mo_vgl(i,2,j) = mo_vgl(i,2,j) + coef_normalized_t(i,k) * c2
mo_vgl(i,3,j) = mo_vgl(i,3,j) + coef_normalized_t(i,k) * c3
mo_vgl(i,4,j) = mo_vgl(i,4,j) + coef_normalized_t(i,k) * c4
mo_vgl(i,5,j) = mo_vgl(i,5,j) + coef_normalized_t(i,k) * c5
end do
end if
end do
end do
info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, &
coef_normalized, int(size(coef_normalized,1),8), &
ao_vgl, int(size(ao_vgl,1),8), beta, &
mo_vgl,LDC)
info = QMCKL_SUCCESS
end function qmckl_compute_mo_basis_vgl_f
end function qmckl_compute_mo_basis_mo_vgl_doc_f
#+end_src
#+CALL: generate_c_header(table=qmckl_mo_basis_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_vgl"))
#+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_compute_mo_basis_vgl (
qmckl_exit_code qmckl_compute_mo_basis_mo_vgl (
const qmckl_context context,
const int64_t ao_num,
const int64_t mo_num,
const int64_t point_num,
const double* coef_normalized,
const double* coef_normalized_t,
const double* ao_vgl,
double* const mo_vgl );
#+end_src
#+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc"))
#+CALL: generate_c_interface(table=qmckl_mo_basis_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_vgl"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_compute_mo_basis_mo_vgl_doc (
const qmckl_context context,
const int64_t ao_num,
const int64_t mo_num,
const int64_t point_num,
const double* coef_normalized_t,
const double* ao_vgl,
double* const mo_vgl );
#+end_src
#+CALL: generate_c_interface(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc"))
#+RESULTS:
#+begin_src f90 :tangle (eval f) :comments org :exports none
integer(c_int32_t) function qmckl_compute_mo_basis_vgl &
(context, ao_num, mo_num, point_num, coef_normalized, ao_vgl, mo_vgl) &
integer(c_int32_t) function qmckl_compute_mo_basis_mo_vgl_doc &
(context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl) &
bind(C) result(info)
use, intrinsic :: iso_c_binding
@ -571,15 +703,176 @@ end function qmckl_compute_mo_basis_vgl_f
integer (c_int64_t) , intent(in) , value :: ao_num
integer (c_int64_t) , intent(in) , value :: mo_num
integer (c_int64_t) , intent(in) , value :: point_num
real (c_double ) , intent(in) :: coef_normalized(ao_num,mo_num)
real (c_double ) , intent(in) :: coef_normalized_t(ao_num,mo_num)
real (c_double ) , intent(in) :: ao_vgl(ao_num,5,point_num)
real (c_double ) , intent(out) :: mo_vgl(mo_num,5,point_num)
integer(c_int32_t), external :: qmckl_compute_mo_basis_vgl_f
info = qmckl_compute_mo_basis_vgl_f &
(context, ao_num, mo_num, point_num, coef_normalized, ao_vgl, mo_vgl)
integer(c_int32_t), external :: qmckl_compute_mo_basis_mo_vgl_doc_f
info = qmckl_compute_mo_basis_mo_vgl_doc_f &
(context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl)
end function qmckl_compute_mo_basis_vgl
end function qmckl_compute_mo_basis_mo_vgl_doc
#+end_src
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code
qmckl_compute_mo_basis_mo_vgl (const qmckl_context context,
const int64_t ao_num,
const int64_t mo_num,
const int64_t point_num,
const double* coef_normalized_t,
const double* ao_vgl,
double* const mo_vgl )
{
#ifdef HAVE_HPC
return qmckl_compute_mo_basis_mo_vgl_hpc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl);
#else
return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl);
#endif
}
#+end_src
*** HPC version
#+begin_src c :tangle (eval h_func) :comments org
#ifdef HAVE_HPC
qmckl_exit_code
qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
const int64_t ao_num,
const int64_t mo_num,
const int64_t point_num,
const double* coef_normalized_t,
const double* ao_vgl,
double* const mo_vgl );
#endif
#+end_src
#+begin_src c :tangle (eval c) :comments org
#ifdef HAVE_HPC
qmckl_exit_code
qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
const int64_t ao_num,
const int64_t mo_num,
const int64_t point_num,
const double* restrict coef_normalized_t,
const double* restrict ao_vgl,
double* restrict const mo_vgl )
{
#ifdef HAVE_OPENMP
#pragma omp parallel for
#endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
double* restrict const vgl1 = &(mo_vgl[ipoint*5*mo_num]);
const double* restrict avgl1 = &(ao_vgl[ipoint*5*ao_num]);
double* restrict const vgl2 = vgl1 + mo_num;
double* restrict const vgl3 = vgl1 + (mo_num << 1);
double* restrict const vgl4 = vgl1 + (mo_num << 1) + mo_num;
double* restrict const vgl5 = vgl1 + (mo_num << 2);
const double* restrict avgl2 = avgl1 + ao_num;
const double* restrict avgl3 = avgl1 + (ao_num << 1);
const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num;
const double* restrict avgl5 = avgl1 + (ao_num << 2);
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = 0.;
vgl2[i] = 0.;
vgl3[i] = 0.;
vgl4[i] = 0.;
vgl5[i] = 0.;
}
int64_t nidx=0;
int64_t idx[ao_num];
double av1[ao_num];
double av2[ao_num];
double av3[ao_num];
double av4[ao_num];
double av5[ao_num];
for (int64_t k=0 ; k<ao_num ; ++k) {
const double* restrict ck1 = coef_normalized_t + k*mo_num;
if (avgl1[k] != 0.) {
idx[nidx] = k;
av1[nidx] = avgl1[k];
av2[nidx] = avgl2[k];
av3[nidx] = avgl3[k];
av4[nidx] = avgl4[k];
av5[nidx] = avgl5[k];
++nidx;
}
}
int64_t n;
for (n=0 ; n < nidx-4 ; n+=4) {
int64_t k = idx[n];
const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num;
const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num;
const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num;
const double* restrict ck4 = coef_normalized_t + idx[n+3]*mo_num;
const double a11 = av1[n ];
const double a21 = av1[n+1];
const double a31 = av1[n+2];
const double a41 = av1[n+3];
const double a12 = av2[n ];
const double a22 = av2[n+1];
const double a32 = av2[n+2];
const double a42 = av2[n+3];
const double a13 = av3[n ];
const double a23 = av3[n+1];
const double a33 = av3[n+2];
const double a43 = av3[n+3];
const double a14 = av4[n ];
const double a24 = av4[n+1];
const double a34 = av4[n+2];
const double a44 = av4[n+3];
const double a15 = av5[n ];
const double a25 = av5[n+1];
const double a35 = av5[n+2];
const double a45 = av5[n+3];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
}
}
int64_t n0 = nidx-4;
n0 = n0 < 0 ? 0 : n0;
for (int64_t n=n0 ; n < nidx ; n+=1) {
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
const double a1 = av1[n];
const double a2 = av2[n];
const double a3 = av3[n];
const double a4 = av4[n];
const double a5 = av5[n];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] += ck[i] * a1;
vgl2[i] += ck[i] * a2;
vgl3[i] += ck[i] * a3;
vgl4[i] += ck[i] * a4;
vgl5[i] += ck[i] * a5;
}
}
}
return QMCKL_SUCCESS;
}
#endif
#+end_src
*** Test
@ -772,7 +1065,7 @@ assert (rc == QMCKL_SUCCESS);
assert(qmckl_mo_basis_provided(context));
double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num];
rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0]));
rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), walk_num*elec_num*5*chbrclf_mo_num);
assert (rc == QMCKL_SUCCESS);
// Test overlap of MO
@ -807,7 +1100,7 @@ assert (rc == QMCKL_SUCCESS);
//
// // Calculate value of MO (1st electron)
// double mo_vgl[5][walk_num][elec_num][chbrclf_mo_num];
// rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0][0]));
// rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0][0]));
// assert (rc == QMCKL_SUCCESS);
// ovlmo1 += mo_vgl[0][0][0][0]*mo_vgl[0][0][0][0]*dr3;
// }

View File

@ -257,7 +257,8 @@ end interface
enough, we overwrite the data contained in them.
To set the data relative to the points in the context, one of the
following functions need to be called.
following functions need to be called. Here, ~num~ is the number of
points to set.
#+begin_src c :comments org :tangle (eval h_func)
qmckl_exit_code qmckl_set_point (qmckl_context context,
@ -348,7 +349,7 @@ qmckl_set_point (qmckl_context context,
#+begin_src f90 :comments org :tangle (eval fh_func) :noweb yes
interface
integer(c_int32_t) function qmckl_set_point(context, &
transp, coord, size_max) bind(C)
transp, coord, num) bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
@ -356,7 +357,7 @@ interface
integer (c_int64_t) , intent(in) , value :: context
character(c_char) , intent(in) , value :: transp
real (c_double ) , intent(in) :: coord(*)
integer (c_int64_t) , intent(in) , value :: size_max
integer (c_int64_t) , intent(in) , value :: num
end function
end interface
#+end_src

View File

@ -6,7 +6,9 @@
#+INFOJS_OPT: toc:t mouse:underline path:org-info.js
#+HTML_HEAD: <link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
#+STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview
# STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview
#+STARTUP: showeverything
#+AUTHOR: TREX CoE
#+LANGUAGE: en