1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-11-03 20:54:09 +01:00

Faster AOs

This commit is contained in:
Anthony Scemama 2022-03-21 18:32:39 +01:00
parent 9124c9209a
commit 5ecb1d6326
3 changed files with 463 additions and 411 deletions

View File

@ -48,6 +48,7 @@ gradients and Laplacian of the atomic basis functions.
#+begin_src c :tangle (eval h_private_func)
#ifndef QMCKL_AO_HPF
#define QMCKL_AO_HPF
#include "qmckl_blas_private_type.h"
#+end_src
@ -57,6 +58,7 @@ gradients and Laplacian of the atomic basis functions.
#include <stdbool.h>
#include <stdio.h>
#include "qmckl_blas_private_type.h"
#+end_src
#+begin_src f90 :tangle (eval f) :noweb yes
@ -105,6 +107,7 @@ int main() {
#include "qmckl.h"
#include "qmckl_context_private_type.h"
#include "qmckl_memory_private_type.h"
#include "qmckl_blas_private_type.h"
#include "qmckl_memory_private_func.h"
#include "qmckl_ao_private_type.h"
#include "qmckl_ao_private_func.h"
@ -274,26 +277,26 @@ typedef struct qmckl_ao_basis_struct {
int64_t shell_num;
int64_t prim_num;
int64_t ao_num;
int64_t * nucleus_index;
int64_t * nucleus_shell_num;
int32_t * shell_ang_mom;
int64_t * shell_prim_num;
int64_t * shell_prim_index;
double * shell_factor;
double * exponent;
double * coefficient;
double * prim_factor;
double * ao_factor;
int64_t * restrict nucleus_index;
int64_t * restrict nucleus_shell_num;
int32_t * restrict shell_ang_mom;
int64_t * restrict shell_prim_num;
int64_t * restrict shell_prim_index;
double * restrict shell_factor;
double * restrict exponent;
double * restrict coefficient;
double * restrict prim_factor;
double * restrict ao_factor;
int64_t * nucleus_prim_index;
double * coefficient_normalized;
int32_t * nucleus_max_ang_mom;
double * nucleus_range;
double * primitive_vgl;
int64_t * restrict nucleus_prim_index;
double * restrict coefficient_normalized;
int32_t * restrict nucleus_max_ang_mom;
double * restrict nucleus_range;
double * restrict primitive_vgl;
uint64_t primitive_vgl_date;
double * shell_vgl;
double * restrict shell_vgl;
uint64_t shell_vgl_date;
double * ao_vgl;
double * restrict ao_vgl;
uint64_t ao_vgl_date;
int32_t uninitialized;
@ -2642,8 +2645,8 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
For faster access, we provide extra arrays for the shell information as:
- $C_{psa}$ = =coef_per_nucleus (iprim, ishell, inucl)=
- $\gamma_{pa}$ =expo_per_nucleus (iprim, inucl)=
- $C_{psa}$ = =coef_per_nucleus[inucl][ishell][iprim]=
- $\gamma_{pa}$ =expo_per_nucleus[inucl][iprim]=
such that the computation of the radial parts can be vectorized.
@ -2660,7 +2663,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
#+NAME:HPC_struct
#+begin_src c :comments org :exports none
/* HPC specific data structures */
int32_t* prim_num_per_nucleus;
int32_t* restrict prim_num_per_nucleus;
qmckl_tensor coef_per_nucleus;
qmckl_matrix expo_per_nucleus;
#+end_src
@ -2681,8 +2684,8 @@ struct combined {
/* Comparison function */
int compare_basis( const void * l, const void * r )
{
const struct combined * _l= (const struct combined *)l;
const struct combined * _r= (const struct combined *)r;
const struct combined * restrict _l= (const struct combined *)l;
const struct combined * restrict _r= (const struct combined *)r;
if( _l->expo > _r->expo ) return 1;
if( _l->expo < _r->expo ) return -1;
return 0;
@ -2756,11 +2759,10 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell];
const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell];
for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) {
// newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim];
newcoef[idx] = ctx->ao_basis.coefficient[iprim];
newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim];
idx += 1;
}
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
idx2 = expo[i].index;
coef[ishell - ishell_start][i] = newcoef[idx2];
}
@ -2773,7 +2775,7 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
int64_t idxmax = 0;
idx = 0;
newidx[0] = 0;
for (int64_t i=1 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=1 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
if (expo[i].expo != expo[i-1].expo) {
idx += 1;
}
@ -2781,33 +2783,33 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
}
idxmax = idx;
for (int64_t j=0 ; j<ishell_end-ishell_start ; ++j) {
for (int32_t j=0 ; j<ishell_end-ishell_start ; ++j) {
memset(newcoef, 0, sizeof(newcoef));
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
newcoef[newidx[i]] += coef[j][i];
}
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
coef[j][i] = newcoef[i];
}
}
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
expo[newidx[i]].expo = expo[i].expo;
}
ctx->ao_basis.prim_num_per_nucleus[inucl] = idxmax+1;
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i].expo;
}
for (int64_t j=0 ; j<ishell_end-ishell_start ; ++j) {
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t j=0 ; j<ishell_end-ishell_start ; ++j) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ) = coef[j][i];
}
}
/*
for (int64_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus[inucl] ; ++i) {
printf("%4ld %4ld %15.5e | ", inucl, i, qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ));
for (int64_t j=0 ; j<ishell_end-ishell_start ; ++j) {
printf("%8.5f ", qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ));
@ -4789,13 +4791,13 @@ end function qmckl_ao_polynomial_transp_vgl_doc_f
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code
qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
const double* X,
const double* R,
const double* restrict X,
const double* restrict R,
const int32_t lmax,
int64_t* n,
int32_t* const L,
int64_t* restrict n,
int32_t* restrict const L,
const int64_t ldl,
double* const VGL,
double* restrict const VGL,
const int64_t ldv )
{
const qmckl_context_struct* ctx = (qmckl_context_struct* const) context;
@ -4831,7 +4833,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
} else {
int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)};
int32_t* restrict const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)};
l[0][0] = 0; l[0][1] = 0; l[0][2] = 0;
l[1][0] = 1; l[1][1] = 0; l[1][2] = 0;
l[2][0] = 0; l[2][1] = 1; l[2][2] = 0;
@ -4852,11 +4854,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
} else {
double* const vgl1 = VGL;
double* const vgl2 = VGL + ldv;
double* const vgl3 = VGL + (ldv << 1);
double* const vgl4 = VGL + ldv + (ldv << 1);
double* const vgl5 = VGL + (ldv << 2);
double* restrict const vgl1 = VGL;
double* restrict const vgl2 = VGL + ldv;
double* restrict const vgl3 = VGL + (ldv << 1);
double* restrict const vgl4 = VGL + ldv + (ldv << 1);
double* restrict const vgl5 = VGL + (ldv << 2);
for (int32_t k=0 ; k<4 ; ++k) {
vgl2[k] = 0.0;
@ -4886,7 +4888,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
L[i] = ltmp[i];
} else {
int32_t* l[10];
int32_t* restrict l[10];
for (int32_t i=0 ; i<10 ; ++i) {
l[i] = L + i*ldl;
}
@ -4922,11 +4924,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
for (int i=0 ; i<50 ; ++i)
VGL[i] = tmp[i];
} else {
double* const vgl1 = VGL;
double* const vgl2 = VGL + ldv;
double* const vgl3 = VGL + (ldv << 1);
double* const vgl4 = VGL + 3*ldv;
double* const vgl5 = VGL + (ldv << 2);
double* restrict const vgl1 = VGL;
double* restrict const vgl2 = VGL + ldv;
double* restrict const vgl3 = VGL + (ldv << 1);
double* restrict const vgl4 = VGL + 3*ldv;
double* restrict const vgl5 = VGL + (ldv << 2);
vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1];
vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1];
@ -4961,11 +4963,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6;
if (ldv < size_max) return QMCKL_INVALID_ARG_9;
double* const vgl1 = VGL;
double* const vgl2 = VGL + ldv;
double* const vgl3 = VGL + (ldv<<1);
double* const vgl4 = VGL + ldv + (ldv<<1);
double* const vgl5 = VGL + (ldv<<2);
double* restrict const vgl1 = VGL;
double* restrict const vgl2 = VGL + ldv;
double* restrict const vgl3 = VGL + (ldv<<1);
double* restrict const vgl4 = VGL + ldv + (ldv<<1);
double* restrict const vgl5 = VGL + (ldv<<2);
const double Y[3] = { X[0]-R[0],
X[1]-R[1],
@ -5464,6 +5466,7 @@ end function qmckl_compute_ao_vgl_doc_f
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
| ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs |
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
#ifdef HAVE_HPC
qmckl_exit_code
@ -5471,22 +5474,20 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int64_t prim_num,
const int32_t* restrict prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* coord,
const double* nucl_coord,
const int64_t* nucleus_index,
const int64_t* nucleus_shell_num,
const double* restrict coord,
const double* restrict nucl_coord,
const int64_t* restrict nucleus_index,
const int64_t* restrict nucleus_shell_num,
const double* nucleus_range,
const int32_t* nucleus_max_ang_mom,
const int32_t* shell_ang_mom,
const int64_t* shell_prim_index,
const int64_t* shell_prim_num,
const double* ao_factor,
const double* expo,
const double* coef_normalized,
double* const ao_vgl )
const int32_t* restrict nucleus_max_ang_mom,
const int32_t* restrict shell_ang_mom,
const double* restrict ao_factor,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_vgl )
{
int32_t lstart[32];
for (int32_t l=0 ; l<32 ; ++l) {
@ -5495,9 +5496,15 @@ qmckl_compute_ao_vgl_hpc_gaussian (
int64_t ao_index[shell_num+1];
int64_t size_max = 0;
int64_t prim_max = 0;
int64_t shell_max = 0;
{
int64_t k=0;
for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
prim_max = prim_num_per_nucleus[inucl] > prim_max ?
prim_num_per_nucleus[inucl] : prim_max;
shell_max = nucleus_shell_num[inucl] > shell_max ?
nucleus_shell_num[inucl] : shell_max;
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
@ -5514,14 +5521,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
double cutoff = -log(1.e-12);
#ifdef HAVE_OPENMP
#pragma omp parallel
#pragma omp parallel
#endif
{
qmckl_exit_code rc;
double c_[prim_num];
double expo_[prim_num];
double ar2[prim_num];
int32_t powers[prim_num];
double ar2[prim_max];
int32_t powers[prim_max];
double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
@ -5533,8 +5538,20 @@ qmckl_compute_ao_vgl_hpc_gaussian (
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max];
double exp_mat[prim_max][8];
double ce_mat[shell_max][8];
double coef_mat[nucl_num][shell_max][prim_max];
for (int i=0 ; i<nucl_num ; ++i) {
for (int j=0 ; j<shell_max; ++j) {
for (int k=0 ; k<prim_max; ++k) {
coef_mat[i][j][k] = qmckl_ten3(coef_per_nucleus,k, j, i);
}
}
}
#ifdef HAVE_OPENMP
#pragma omp for
#pragma omp for
#endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
const double e_coord[3] = { coord[ipoint],
@ -5600,72 +5617,75 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
/* Compute all exponents */
/*
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus ; ++i) {
const double v = expo[iprim]*r2;
if (v > cutoff) {
nidx = i;
int64_t nidx = 0;
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
if (v <= cutoff) {
ar2[iprim] = v;
++nidx;
} else {
break;
}
ar2[i] = -v;
}
for (int32_t i=0 ; i<ctx->ao_basis.prim_num_per_nucleus ; ++i) {
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim][0] = exp(-ar2[iprim]);
}
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0];
f = -f-f;
exp_mat[iprim][1] = f * x;
exp_mat[iprim][2] = f * y;
exp_mat[iprim][3] = f * z;
exp_mat[iprim][4] = f * (3.0 - 2.0 * ar2[iprim]);
}
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] = 0.;
}
}
for (int k=0 ; k<nidx; ++k) {
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
if (coef_mat[inucl][i][k] != 0.) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] += coef_mat[inucl][i][k] * exp_mat[k][j];
}
}
}
}
*/
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
int64_t nidx = 0;
const int64_t iprim_start = shell_prim_index[ishell];
const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell];
for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) {
const double v = expo[iprim]*r2;
if (v <= cutoff) {
ar2[nidx] = v;
c_[nidx] = coef_normalized[iprim];
expo_[nidx] = expo[iprim];
++nidx;
}
}
double s1_ = 0.;
double s5_ = 0.;
double s6_ = 0.;
for (int idx=0 ; idx<nidx ; ++idx) {
const double v = c_[idx] * exp(-ar2[idx]);
const double t = v*expo_[idx];
s1_ += v;
s6_ -= t;
s5_ += t*(4.0*ar2[idx]-6.0);
}
s6_ += s6_;
const double s1 = s1_;
const double s2 = s6_*x;
const double s3 = s6_*y;
const double s4 = s6_*z;
const double s5 = s5_;
const double s1 = ce_mat[ishell-ishell_start][0];
if (s1 == 0.0) continue;
const double s2 = ce_mat[ishell-ishell_start][1];
const double s3 = ce_mat[ishell-ishell_start][2];
const double s4 = ce_mat[ishell-ishell_start][3];
const double s5 = ce_mat[ishell-ishell_start][4];
const int64_t k = ao_index[ishell];
double* __restrict__ const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
double* restrict const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l];
double* __restrict__ const ao_vgl_2 = ao_vgl_1 + ao_num;
double* __restrict__ const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
double* __restrict__ const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
double* __restrict__ const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
double* restrict const ao_vgl_2 = ao_vgl_1 + ao_num;
double* restrict const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
double* restrict const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
double* __restrict__ poly_vgl_1;
double* __restrict__ poly_vgl_2;
double* __restrict__ poly_vgl_3;
double* __restrict__ poly_vgl_4;
double* __restrict__ poly_vgl_5;
double* restrict poly_vgl_1 = NULL;
double* restrict poly_vgl_2 = NULL;
double* restrict poly_vgl_3 = NULL;
double* restrict poly_vgl_4 = NULL;
double* restrict poly_vgl_5 = NULL;
if (nidx > 0) {
const double* __restrict__ f = ao_factor + k;
const double* restrict f = ao_factor + k;
const int64_t idx = lstart[l];
switch (nucleus_max_ang_mom[inucl]) {
@ -5682,6 +5702,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
break;
default:
poly_vgl_1 = &(poly_vgl[0][idx]);
@ -5700,7 +5721,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
break;
case (3):
#ifdef HAVE_OPENMP
#pragma omp simd
#pragma omp simd
#endif
for (int il=0 ; il<3 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
@ -5713,16 +5734,16 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_4[il] * s4 )) * f[il];
}
break;
case(5):
case(6):
#ifdef HAVE_OPENMP
#pragma omp simd
#pragma omp simd
#endif
for (int il=0 ; il<5 ; ++il) {
for (int il=0 ; il<6 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
@ -5730,7 +5751,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
break;
default:
#ifdef HAVE_OPENMP
#pragma omp simd simdlen(8)
#pragma omp simd simdlen(8)
#endif
for (int il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
@ -5754,10 +5775,10 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
}
}
}
}
}
}
}
}
return QMCKL_SUCCESS;
}
#endif
@ -5792,7 +5813,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int64_t prim_num,
const int32_t* prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* coord,
@ -5802,11 +5823,9 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const double* nucleus_range,
const int32_t* nucleus_max_ang_mom,
const int32_t* shell_ang_mom,
const int64_t* shell_prim_index,
const int64_t* shell_prim_num,
const double* ao_factor,
const double* expo,
const double* coef_normalized,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* const ao_vgl );
#endif
#+end_src
@ -5934,7 +5953,7 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
rc = qmckl_compute_ao_vgl_hpc_gaussian(context,
ctx->ao_basis.ao_num,
ctx->ao_basis.shell_num,
ctx->ao_basis.prim_num,
ctx->ao_basis.prim_num_per_nucleus,
ctx->point.num,
ctx->nucleus.num,
ctx->point.coord.data,
@ -5944,11 +5963,9 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
ctx->ao_basis.nucleus_range,
ctx->ao_basis.nucleus_max_ang_mom,
ctx->ao_basis.shell_ang_mom,
ctx->ao_basis.shell_prim_index,
ctx->ao_basis.shell_prim_num,
ctx->ao_basis.ao_factor,
ctx->ao_basis.exponent,
ctx->ao_basis.coefficient_normalized,
ctx->ao_basis.expo_per_nucleus,
ctx->ao_basis.coef_per_nucleus,
ctx->ao_basis.ao_vgl);
/*
} else if (ctx->ao_basis.type == 'S') {
@ -6023,77 +6040,74 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
import numpy as np
from math import sqrt
h0 = 1.e-4
def f(a,x,y):
return np.sum( [c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] )
def fx(a,x,y):
return f(a,x,y) * (x[0] - y[0])**2
def df(a,x,y,n):
h0 = 1.e-6
if n == 1: h = np.array([h0,0.,0.])
elif n == 2: h = np.array([0.,h0,0.])
elif n == 3: h = np.array([0.,0.,h0])
return ( f(a,x+h,y) - f(a,x-h,y) ) / (2.*h0)
return ( fx(a,x+h,y) - fx(a,x-h,y) ) / (2.*h0)
# return np.sum( [-2.*b * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) * (x-y)[n-1]
def d2f(a,x,y,n):
h0 = 1.e-6
if n == 1: h = np.array([h0,0.,0.])
elif n == 2: h = np.array([0.,h0,0.])
elif n == 3: h = np.array([0.,0.,h0])
return ( f(a,x+h,y) - 2.*f(a,x,y) + f(a,x-h,y) ) / h0**2
return ( fx(a,x+h,y) - 2.*fx(a,x,y) + fx(a,x-h,y) ) / h0**2
# return np.sum( [( (2.*b*(x-y)[n-1])**2 -2.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] )
def lf(a,x,y):
# return np.sum( [( (2.*b*np.linalg.norm(x-y))**2 -6.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] )
return d2f(a,x,y,1) + d2f(a,x,y,2) + d2f(a,x,y,3)
elec_26_w1 = np.array( [ 1.49050402641, 2.90106987953, -1.05920815468 ] )
elec_15_w2 = np.array( [ -2.20180344582,-1.9113150239, 2.2193744778600002 ] )
nucl_1 = np.array( [ -2.302574592081335e+00, -3.542027060505035e-01, -5.334129934317614e-02] )
#double ao_vgl[prim_num][5][elec_num];
x = elec_26_w1 ; y = nucl_1
a = [( 403.830000, 0.001473 * 5.9876577632594533e+04),
( 121.170000, 0.012672 * 7.2836806319891484e+03),
( 46.345000, 0.058045 * 1.3549226646722386e+03),
( 19.721000, 0.170510 * 3.0376315094739988e+02),
( 8.862400, 0.318596 * 7.4924579607137730e+01),
( 3.996200, 0.384502 * 1.8590543353806009e+01),
( 1.763600, 0.273774 * 4.4423176930919421e+00),
( 0.706190, 0.074397 * 8.9541051939952665e-01)]
a = [( 4.0382999999999998e+02, 1.4732000000000000e-03 * 5.9876577632594533e+04),
( 1.2117000000000000e+02, 1.2672500000000000e-02 * 7.2836806319891484e+03),
( 4.6344999999999999e+01, 5.8045100000000002e-02 * 1.3549226646722386e+03),
( 1.9721000000000000e+01, 1.7051030000000000e-01 * 3.0376315094739988e+02),
( 8.8623999999999992e+00, 3.1859579999999998e-01 * 7.4924579607137730e+01),
( 3.9962000000000000e+00, 3.8450230000000002e-01 * 1.8590543353806009e+01),
( 1.7636000000000001e+00, 2.7377370000000001e-01 * 4.4423176930919421e+00),
( 7.0618999999999998e-01, 7.4396699999999996e-02 * 8.9541051939952665e-01)]
norm = sqrt(3.)
print ( "[0][26][219] : %25.15e"%(f(a,x,y) * (x[0] - y[0])**2) )
print ( "[1][26][219] : %25.15e"%(df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + 2.*f(a,x,y) * (x[0] - y[0])) )
# x^2 * g(r)
print ( "[26][0][219] : %25.15e"%(fx(a,x,y)) )
print ( "[26][1][219] : %25.15e"%(df(a,x,y,1)) )
print ( "[26][2][219] : %25.15e"%(df(a,x,y,2)) )
print ( "[26][3][219] : %25.15e"%(df(a,x,y,3)) )
print ( "[26][4][219] : %25.15e"%(lf(a,x,y)) )
print ( "[0][26][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) ))
print ( "[1][26][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) )
print ( "[26][0][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) ))
print ( "[26][1][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) )
print ( "[0][26][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) )
print ( "[1][26][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) )
print ( "[26][0][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) )
print ( "[26][1][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) )
print ( "[0][26][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[1][26][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[26][0][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[26][1][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) )
print ( "[0][26][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[1][26][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[26][0][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[26][1][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) )
print ( "[0][26][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) )
print ( "[1][26][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) )
print ( "[26][0][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) )
print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) )
#+end_src
#+RESULTS:
#+begin_example
[0][26][219] : 1.020302912653649e-08
[1][26][219] : -4.153046808203204e-08
[0][26][220] : 1.516649653540510e-08
[1][26][220] : -7.725252615816528e-08
[0][26][221] : -4.686389780112468e-09
[1][26][221] : 2.387073693851122e-08
[0][26][222] : 7.514847283937212e-09
[1][26][222] : -4.025905373647693e-08
[0][26][223] : -4.021924592380977e-09
[1][26][223] : 2.154652944642284e-08
[0][26][224] : 7.175074806631758e-10
[1][26][224] : -3.843880138733679e-09
#+end_example
#+begin_src c :tangle (eval c_test) :exports none
{
@ -6125,32 +6139,69 @@ rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]),
assert (rc == QMCKL_SUCCESS);
printf("\n");
printf(" ao_vgl ao_vgl[0][26][219] %25.15e\n", ao_vgl[26][0][219]);
printf(" ao_vgl ao_vgl[1][26][219] %25.15e\n", ao_vgl[26][1][219]);
printf(" ao_vgl ao_vgl[0][26][220] %25.15e\n", ao_vgl[26][0][220]);
printf(" ao_vgl ao_vgl[1][26][220] %25.15e\n", ao_vgl[26][1][220]);
printf(" ao_vgl ao_vgl[0][26][221] %25.15e\n", ao_vgl[26][0][221]);
printf(" ao_vgl ao_vgl[1][26][221] %25.15e\n", ao_vgl[26][1][221]);
printf(" ao_vgl ao_vgl[0][26][222] %25.15e\n", ao_vgl[26][0][222]);
printf(" ao_vgl ao_vgl[1][26][222] %25.15e\n", ao_vgl[26][1][222]);
printf(" ao_vgl ao_vgl[0][26][223] %25.15e\n", ao_vgl[26][0][223]);
printf(" ao_vgl ao_vgl[1][26][223] %25.15e\n", ao_vgl[26][1][223]);
printf(" ao_vgl ao_vgl[0][26][224] %25.15e\n", ao_vgl[26][0][224]);
printf(" ao_vgl ao_vgl[1][26][224] %25.15e\n", ao_vgl[26][1][224]);
printf(" ao_vgl ao_vgl[26][0][219] %25.15e\n", ao_vgl[26][0][219]);
printf(" ao_vgl ao_vgl[26][1][219] %25.15e\n", ao_vgl[26][1][219]);
printf(" ao_vgl ao_vgl[26][2][219] %25.15e\n", ao_vgl[26][2][219]);
printf(" ao_vgl ao_vgl[26][3][219] %25.15e\n", ao_vgl[26][3][219]);
printf(" ao_vgl ao_vgl[26][4][219] %25.15e\n", ao_vgl[26][4][219]);
printf(" ao_vgl ao_vgl[26][0][220] %25.15e\n", ao_vgl[26][0][220]);
printf(" ao_vgl ao_vgl[26][1][220] %25.15e\n", ao_vgl[26][1][220]);
printf(" ao_vgl ao_vgl[26][2][220] %25.15e\n", ao_vgl[26][2][220]);
printf(" ao_vgl ao_vgl[26][3][220] %25.15e\n", ao_vgl[26][3][220]);
printf(" ao_vgl ao_vgl[26][4][220] %25.15e\n", ao_vgl[26][4][220]);
printf(" ao_vgl ao_vgl[26][0][221] %25.15e\n", ao_vgl[26][0][221]);
printf(" ao_vgl ao_vgl[26][1][221] %25.15e\n", ao_vgl[26][1][221]);
printf(" ao_vgl ao_vgl[26][2][221] %25.15e\n", ao_vgl[26][2][221]);
printf(" ao_vgl ao_vgl[26][3][221] %25.15e\n", ao_vgl[26][3][221]);
printf(" ao_vgl ao_vgl[26][4][221] %25.15e\n", ao_vgl[26][4][221]);
printf(" ao_vgl ao_vgl[26][0][222] %25.15e\n", ao_vgl[26][0][222]);
printf(" ao_vgl ao_vgl[26][1][222] %25.15e\n", ao_vgl[26][1][222]);
printf(" ao_vgl ao_vgl[26][2][222] %25.15e\n", ao_vgl[26][2][222]);
printf(" ao_vgl ao_vgl[26][3][222] %25.15e\n", ao_vgl[26][3][222]);
printf(" ao_vgl ao_vgl[26][4][222] %25.15e\n", ao_vgl[26][4][222]);
printf(" ao_vgl ao_vgl[26][0][223] %25.15e\n", ao_vgl[26][0][223]);
printf(" ao_vgl ao_vgl[26][1][223] %25.15e\n", ao_vgl[26][1][223]);
printf(" ao_vgl ao_vgl[26][2][223] %25.15e\n", ao_vgl[26][2][223]);
printf(" ao_vgl ao_vgl[26][3][223] %25.15e\n", ao_vgl[26][3][223]);
printf(" ao_vgl ao_vgl[26][4][223] %25.15e\n", ao_vgl[26][4][223]);
printf(" ao_vgl ao_vgl[26][0][224] %25.15e\n", ao_vgl[26][0][224]);
printf(" ao_vgl ao_vgl[26][1][224] %25.15e\n", ao_vgl[26][1][224]);
printf(" ao_vgl ao_vgl[26][2][224] %25.15e\n", ao_vgl[26][2][224]);
printf(" ao_vgl ao_vgl[26][3][224] %25.15e\n", ao_vgl[26][3][224]);
printf(" ao_vgl ao_vgl[26][4][224] %25.15e\n", ao_vgl[26][4][224]);
printf("\n");
assert( fabs(ao_vgl[26][0][219] - ( 1.020298798341620e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][219] - (-4.928035238010602e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][219] - ( -4.928035238010602e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][219] - ( -4.691009312035986e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][219] - ( 1.449504046436699e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][219] - ( 4.296442111843973e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][220] - ( 1.516643537739178e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][220] - (-7.725221462603871e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][221] - (-4.686370882518819e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][220] - ( -7.725221462603871e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][220] - ( -6.507140835104833e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][220] - ( 2.154644255710413e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][220] - ( 6.365449359656352e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][221] - ( -4.686370882518819e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][221] - ( 2.387064067626827e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][221] - ( 2.154644255710412e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][221] - ( -1.998731863512374e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][221] - ( -1.966899656441993e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][222] - ( 7.514816980753531e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][222] - (-4.025889138635182e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][223] - (-4.021908374204471e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][222] - ( -4.025889138635182e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][222] - ( -2.993372555126361e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][222] - ( 1.067604670272904e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][222] - ( 3.168199650002648e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][223] - ( -4.021908374204471e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][223] - ( 2.154644255710413e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][223] - ( 1.725594944732276e-08)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][223] - ( -1.715339357718333e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][223] - ( -1.688020516893476e-07)) < 1.e-14 );
assert( fabs(ao_vgl[26][0][224] - ( 7.175045873560788e-10)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][1][224] - ( -3.843864637762753e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][2][224] - ( -3.298857850451910e-09)) < 1.e-14 );
assert( fabs(ao_vgl[26][3][224] - ( -4.073047518790881e-10)) < 1.e-14 );
assert( fabs(ao_vgl[26][4][224] - ( 3.153244195820293e-08)) < 1.e-14 );
}
#+end_src
@ -6204,6 +6255,5 @@ assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 );
# -*- mode: org -*-
# vim: syntax=c
* TODO [0/1] Missing features :noexport:
- [ ] Error messages to tell what is missing when initializing

View File

@ -85,7 +85,7 @@ are not intended to be passed to external codes.
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_vector {
int64_t size;
double* data;
double* restrict data;
} qmckl_vector;
#+end_src
@ -161,7 +161,7 @@ qmckl_vector_free( qmckl_context context,
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_matrix {
int64_t size[2];
double* data;
double* restrict data;
} qmckl_matrix;
#+end_src
@ -247,7 +247,7 @@ qmckl_matrix_free( qmckl_context context,
typedef struct qmckl_tensor {
int64_t order;
int64_t size[QMCKL_TENSOR_ORDER_MAX];
double* data;
double* restrict data;
} qmckl_tensor;
#+end_src

View File

@ -6,7 +6,9 @@
#+INFOJS_OPT: toc:t mouse:underline path:org-info.js
#+HTML_HEAD: <link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
#+STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview
# STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview
#+STARTUP: showeverything
#+AUTHOR: TREX CoE
#+LANGUAGE: en