From 5ecb1d63267724ff06342e8863cea35609e79261 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Mar 2022 18:32:39 +0100 Subject: [PATCH] Faster AOs --- org/qmckl_ao.org | 864 ++++++++++++++++++++++++--------------------- org/qmckl_blas.org | 6 +- tools/theme.setup | 4 +- 3 files changed, 463 insertions(+), 411 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index d3d5012..5bc9235 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -48,6 +48,7 @@ gradients and Laplacian of the atomic basis functions. #+begin_src c :tangle (eval h_private_func) #ifndef QMCKL_AO_HPF #define QMCKL_AO_HPF +#include "qmckl_blas_private_type.h" #+end_src @@ -57,6 +58,7 @@ gradients and Laplacian of the atomic basis functions. #include #include +#include "qmckl_blas_private_type.h" #+end_src #+begin_src f90 :tangle (eval f) :noweb yes @@ -105,6 +107,7 @@ int main() { #include "qmckl.h" #include "qmckl_context_private_type.h" #include "qmckl_memory_private_type.h" +#include "qmckl_blas_private_type.h" #include "qmckl_memory_private_func.h" #include "qmckl_ao_private_type.h" #include "qmckl_ao_private_func.h" @@ -271,35 +274,35 @@ end interface #+begin_src c :comments org :tangle (eval h_private_type) :noweb yes typedef struct qmckl_ao_basis_struct { - int64_t shell_num; - int64_t prim_num; - int64_t ao_num; - int64_t * nucleus_index; - int64_t * nucleus_shell_num; - int32_t * shell_ang_mom; - int64_t * shell_prim_num; - int64_t * shell_prim_index; - double * shell_factor; - double * exponent; - double * coefficient; - double * prim_factor; - double * ao_factor; + int64_t shell_num; + int64_t prim_num; + int64_t ao_num; + int64_t * restrict nucleus_index; + int64_t * restrict nucleus_shell_num; + int32_t * restrict shell_ang_mom; + int64_t * restrict shell_prim_num; + int64_t * restrict shell_prim_index; + double * restrict shell_factor; + double * restrict exponent; + double * restrict coefficient; + double * restrict prim_factor; + double * restrict ao_factor; - int64_t * nucleus_prim_index; - double * coefficient_normalized; - int32_t * nucleus_max_ang_mom; - double * nucleus_range; - double * primitive_vgl; - uint64_t primitive_vgl_date; - double * shell_vgl; - uint64_t shell_vgl_date; - double * ao_vgl; - uint64_t ao_vgl_date; + int64_t * restrict nucleus_prim_index; + double * restrict coefficient_normalized; + int32_t * restrict nucleus_max_ang_mom; + double * restrict nucleus_range; + double * restrict primitive_vgl; + uint64_t primitive_vgl_date; + double * restrict shell_vgl; + uint64_t shell_vgl_date; + double * restrict ao_vgl; + uint64_t ao_vgl_date; - int32_t uninitialized; - bool provided; - bool ao_cartesian; - char type; + int32_t uninitialized; + bool provided; + bool ao_cartesian; + char type; #ifdef HAVE_HPC <> #endif @@ -774,14 +777,14 @@ qmckl_set_ao_basis_shell_prim_num (qmckl_context context, } #+end_src - + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_shell_prim_index (qmckl_context context, const int64_t* shell_prim_index, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_ao_basis_shell_prim_index (qmckl_context context, @@ -835,15 +838,15 @@ qmckl_set_ao_basis_shell_prim_index (qmckl_context context, <> } #+end_src - - + + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_shell_factor (qmckl_context context, const double* shell_factor, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_ao_basis_shell_factor (qmckl_context context, @@ -897,15 +900,15 @@ qmckl_set_ao_basis_shell_factor (qmckl_context context, <> } #+end_src - - + + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_exponent (qmckl_context context, const double* exponent, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_ao_basis_exponent (qmckl_context context, @@ -2642,11 +2645,11 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { For faster access, we provide extra arrays for the shell information as: - - $C_{psa}$ = =coef_per_nucleus (iprim, ishell, inucl)= - - $\gamma_{pa}$ =expo_per_nucleus (iprim, inucl)= + - $C_{psa}$ = =coef_per_nucleus[inucl][ishell][iprim]= + - $\gamma_{pa}$ =expo_per_nucleus[inucl][iprim]= such that the computation of the radial parts can be vectorized. - + Exponents are sorted in increasing order to exit loops quickly, and only unique exponents are kept. This also allows to pack the exponents to enable vectorization of exponentials. @@ -2656,13 +2659,13 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { R_{sa} = \sum_p C_{psa} \gamma_{pa} \] which is a matrix-vector product. - - #+NAME:HPC_struct + + #+NAME:HPC_struct #+begin_src c :comments org :exports none /* HPC specific data structures */ - int32_t* prim_num_per_nucleus; - qmckl_tensor coef_per_nucleus; - qmckl_matrix expo_per_nucleus; + int32_t* restrict prim_num_per_nucleus; + qmckl_tensor coef_per_nucleus; + qmckl_matrix expo_per_nucleus; #+end_src #+begin_src c :comments org :tangle (eval h_private_func) :exports none @@ -2670,7 +2673,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context); #endif #+end_src - + #+begin_src c :comments org :tangle (eval c) :exports none /* Data structure for sorting */ struct combined { @@ -2681,9 +2684,9 @@ struct combined { /* Comparison function */ int compare_basis( const void * l, const void * r ) { - const struct combined * _l= (const struct combined *)l; - const struct combined * _r= (const struct combined *)r; - if( _l->expo > _r->expo ) return 1; + const struct combined * restrict _l= (const struct combined *)l; + const struct combined * restrict _r= (const struct combined *)r; + if( _l->expo > _r->expo ) return 1; if( _l->expo < _r->expo ) return -1; return 0; } @@ -2735,7 +2738,7 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) const int64_t ishell_start = ctx->ao_basis.nucleus_index[inucl]; const int64_t ishell_end = ctx->ao_basis.nucleus_index[inucl] + ctx->ao_basis.nucleus_shell_num[inucl]; for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - + const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell]; const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell]; for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { @@ -2751,16 +2754,15 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) idx = 0; int64_t idx2 = 0; for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - + memset(newcoef, 0, sizeof(newcoef)); const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell]; const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell]; for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { -// newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim]; - newcoef[idx] = ctx->ao_basis.coefficient[iprim]; + newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim]; idx += 1; } - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { idx2 = expo[i].index; coef[ishell - ishell_start][i] = newcoef[idx2]; } @@ -2773,7 +2775,7 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) int64_t idxmax = 0; idx = 0; newidx[0] = 0; - for (int64_t i=1 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=1 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { if (expo[i].expo != expo[i-1].expo) { idx += 1; } @@ -2781,33 +2783,33 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) } idxmax = idx; - for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { newcoef[newidx[i]] += coef[j][i]; } - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { coef[j][i] = newcoef[i]; } } - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { expo[newidx[i]].expo = expo[i].expo; } ctx->ao_basis.prim_num_per_nucleus[inucl] = idxmax+1; - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i].expo; } - for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ) = coef[j][i]; } } -/* - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { +/* + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { printf("%4ld %4ld %15.5e | ", inucl, i, qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl )); for (int64_t j=0 ; jao_basis.coef_per_nucleus, i, j, inucl )); @@ -2818,11 +2820,11 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) */ } - + return QMCKL_SUCCESS; } #endif - + #+end_src *** Access functions @@ -4789,13 +4791,13 @@ end function qmckl_ao_polynomial_transp_vgl_doc_f #+begin_src c :tangle (eval c) :comments org qmckl_exit_code qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, - const double* X, - const double* R, + const double* restrict X, + const double* restrict R, const int32_t lmax, - int64_t* n, - int32_t* const L, + int64_t* restrict n, + int32_t* restrict const L, const int64_t ldl, - double* const VGL, + double* restrict const VGL, const int64_t ldv ) { const qmckl_context_struct* ctx = (qmckl_context_struct* const) context; @@ -4831,7 +4833,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, } else { - int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)}; + int32_t* restrict const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)}; l[0][0] = 0; l[0][1] = 0; l[0][2] = 0; l[1][0] = 1; l[1][1] = 0; l[1][2] = 0; l[2][0] = 0; l[2][1] = 1; l[2][2] = 0; @@ -4852,11 +4854,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, } else { - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv << 1); - double* const vgl4 = VGL + ldv + (ldv << 1); - double* const vgl5 = VGL + (ldv << 2); + double* restrict const vgl1 = VGL; + double* restrict const vgl2 = VGL + ldv; + double* restrict const vgl3 = VGL + (ldv << 1); + double* restrict const vgl4 = VGL + ldv + (ldv << 1); + double* restrict const vgl5 = VGL + (ldv << 2); for (int32_t k=0 ; k<4 ; ++k) { vgl2[k] = 0.0; @@ -4886,7 +4888,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, L[i] = ltmp[i]; } else { - int32_t* l[10]; + int32_t* restrict l[10]; for (int32_t i=0 ; i<10 ; ++i) { l[i] = L + i*ldl; } @@ -4922,11 +4924,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, for (int i=0 ; i<50 ; ++i) VGL[i] = tmp[i]; } else { - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv << 1); - double* const vgl4 = VGL + 3*ldv; - double* const vgl5 = VGL + (ldv << 2); + double* restrict const vgl1 = VGL; + double* restrict const vgl2 = VGL + ldv; + double* restrict const vgl3 = VGL + (ldv << 1); + double* restrict const vgl4 = VGL + 3*ldv; + double* restrict const vgl5 = VGL + (ldv << 2); vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1]; vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1]; @@ -4961,11 +4963,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6; if (ldv < size_max) return QMCKL_INVALID_ARG_9; - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv<<1); - double* const vgl4 = VGL + ldv + (ldv<<1); - double* const vgl5 = VGL + (ldv<<2); + double* restrict const vgl1 = VGL; + double* restrict const vgl2 = VGL + ldv; + double* restrict const vgl3 = VGL + (ldv<<1); + double* restrict const vgl4 = VGL + ldv + (ldv<<1); + double* restrict const vgl5 = VGL + (ldv<<2); const double Y[3] = { X[0]-R[0], X[1]-R[1], @@ -5464,29 +5466,28 @@ end function qmckl_compute_ao_vgl_doc_f | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none #ifdef HAVE_HPC qmckl_exit_code qmckl_compute_ao_vgl_hpc_gaussian ( - const qmckl_context context, - const int64_t ao_num, - const int64_t shell_num, - const int64_t prim_num, - const int64_t point_num, - const int64_t nucl_num, - const double* coord, - const double* nucl_coord, - const int64_t* nucleus_index, - const int64_t* nucleus_shell_num, - const double* nucleus_range, - const int32_t* nucleus_max_ang_mom, - const int32_t* shell_ang_mom, - const int64_t* shell_prim_index, - const int64_t* shell_prim_num, - const double* ao_factor, - const double* expo, - const double* coef_normalized, - double* const ao_vgl ) + const qmckl_context context, + const int64_t ao_num, + const int64_t shell_num, + const int32_t* restrict prim_num_per_nucleus, + const int64_t point_num, + const int64_t nucl_num, + const double* restrict coord, + const double* restrict nucl_coord, + const int64_t* restrict nucleus_index, + const int64_t* restrict nucleus_shell_num, + const double* nucleus_range, + const int32_t* restrict nucleus_max_ang_mom, + const int32_t* restrict shell_ang_mom, + const double* restrict ao_factor, + const qmckl_matrix expo_per_nucleus, + const qmckl_tensor coef_per_nucleus, + double* restrict const ao_vgl ) { int32_t lstart[32]; for (int32_t l=0 ; l<32 ; ++l) { @@ -5495,37 +5496,41 @@ qmckl_compute_ao_vgl_hpc_gaussian ( int64_t ao_index[shell_num+1]; int64_t size_max = 0; + int64_t prim_max = 0; + int64_t shell_max = 0; { - int64_t k=0; - for (int inucl=0 ; inucl < nucl_num ; ++inucl) { - const int64_t ishell_start = nucleus_index[inucl]; - const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; - for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - const int l = shell_ang_mom[ishell]; - ao_index[ishell] = k; - k += lstart[l+1] - lstart[l]; - size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max; - } - } - ao_index[shell_num] = ao_num+1; + int64_t k=0; + for (int inucl=0 ; inucl < nucl_num ; ++inucl) { + prim_max = prim_num_per_nucleus[inucl] > prim_max ? + prim_num_per_nucleus[inucl] : prim_max; + shell_max = nucleus_shell_num[inucl] > shell_max ? + nucleus_shell_num[inucl] : shell_max; + const int64_t ishell_start = nucleus_index[inucl]; + const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; + for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { + const int l = shell_ang_mom[ishell]; + ao_index[ishell] = k; + k += lstart[l+1] - lstart[l]; + size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max; + } + } + ao_index[shell_num] = ao_num+1; } /* Don't compute polynomials when the radial part is zero. */ double cutoff = -log(1.e-12); #ifdef HAVE_OPENMP - #pragma omp parallel +#pragma omp parallel #endif { qmckl_exit_code rc; - double c_[prim_num]; - double expo_[prim_num]; - double ar2[prim_num]; - int32_t powers[prim_num]; + double ar2[prim_max]; + int32_t powers[prim_max]; double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, - {0.0, 1.0, 0.0, 0.0}, - {0.0, 0.0, 1.0, 0.0}, - {0.0, 0.0, 0.0, 1.0}}; + {0.0, 1.0, 0.0, 0.0}, + {0.0, 0.0, 1.0, 0.0}, + {0.0, 0.0, 0.0, 1.0}}; double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, @@ -5533,231 +5538,247 @@ qmckl_compute_ao_vgl_hpc_gaussian ( {0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}}; double poly_vgl[5][size_max]; + double exp_mat[prim_max][8]; + double ce_mat[shell_max][8]; + + double coef_mat[nucl_num][shell_max][prim_max]; + for (int i=0 ; i cutoff * nucleus_range[inucl]) { - continue; + if (r2 > cutoff * nucleus_range[inucl]) { + continue; + } + + int64_t n_poly; + switch (nucleus_max_ang_mom[inucl]) { + case 0: + break; + + case 1: + poly_vgl_l1[0][1] = x; + poly_vgl_l1[0][2] = y; + poly_vgl_l1[0][3] = z; + break; + + case 2: + poly_vgl_l2[0][1] = x; + poly_vgl_l2[0][2] = y; + poly_vgl_l2[0][3] = z; + poly_vgl_l2[0][4] = x*x; + poly_vgl_l2[0][5] = x*y; + poly_vgl_l2[0][6] = x*z; + poly_vgl_l2[0][7] = y*y; + poly_vgl_l2[0][8] = y*z; + poly_vgl_l2[0][9] = z*z; + poly_vgl_l2[1][4] = x+x; + poly_vgl_l2[1][5] = y; + poly_vgl_l2[1][6] = z; + poly_vgl_l2[2][5] = x; + poly_vgl_l2[2][7] = y+y; + poly_vgl_l2[2][8] = z; + poly_vgl_l2[3][6] = x; + poly_vgl_l2[3][8] = y; + poly_vgl_l2[3][9] = z+z; + break; + + default: + rc = qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + &(poly_vgl[0][0]), size_max); + + assert (rc == QMCKL_SUCCESS); + break; + } + + /* Compute all exponents */ + + int64_t nidx = 0; + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2[iprim] = v; + ++nidx; + } else { + break; } + } - int64_t n_poly; - switch (nucleus_max_ang_mom[inucl]) { - case 0: - break; + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim][0] = exp(-ar2[iprim]); + } + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0]; + f = -f-f; + exp_mat[iprim][1] = f * x; + exp_mat[iprim][2] = f * y; + exp_mat[iprim][3] = f * z; + exp_mat[iprim][4] = f * (3.0 - 2.0 * ar2[iprim]); + } - case 1: - poly_vgl_l1[0][1] = x; - poly_vgl_l1[0][2] = y; - poly_vgl_l1[0][3] = z; - break; + for (int i=0 ; i 0) { + const double* restrict f = ao_factor + k; + const int64_t idx = lstart[l]; + + switch (nucleus_max_ang_mom[inucl]) { + case 0: break; - } - - /* Compute all exponents */ - /* - for (int32_t i=0 ; iao_basis.prim_num_per_nucleus ; ++i) { - const double v = expo[iprim]*r2; - if (v > cutoff) { - nidx = i; + case 1: + poly_vgl_1 = &(poly_vgl_l1[0][idx]); + poly_vgl_2 = &(poly_vgl_l1[1][idx]); + poly_vgl_3 = &(poly_vgl_l1[2][idx]); + poly_vgl_4 = &(poly_vgl_l1[3][idx]); + break; + case 2: + poly_vgl_1 = &(poly_vgl_l2[0][idx]); + poly_vgl_2 = &(poly_vgl_l2[1][idx]); + poly_vgl_3 = &(poly_vgl_l2[2][idx]); + poly_vgl_4 = &(poly_vgl_l2[3][idx]); + poly_vgl_5 = &(poly_vgl_l2[4][idx]); + break; + default: + poly_vgl_1 = &(poly_vgl[0][idx]); + poly_vgl_2 = &(poly_vgl[1][idx]); + poly_vgl_3 = &(poly_vgl[2][idx]); + poly_vgl_4 = &(poly_vgl[3][idx]); + poly_vgl_5 = &(poly_vgl[4][idx]); + } + switch (n) { + case(1): + ao_vgl_1[0] = s1 * f[0]; + ao_vgl_2[0] = s2 * f[0]; + ao_vgl_3[0] = s3 * f[0]; + ao_vgl_4[0] = s4 * f[0]; + ao_vgl_5[0] = s5; + break; + case (3): +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int il=0 ; il<3 ; ++il) { + ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; + ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; + ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; + ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; + ao_vgl_5[il] = (poly_vgl_1[il] * s5 + + 2.0*(poly_vgl_2[il] * s2 + + poly_vgl_3[il] * s3 + + poly_vgl_4[il] * s4 )) * f[il]; + } + break; + case(6): +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int il=0 ; il<6 ; ++il) { + ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; + ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; + ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; + ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; + ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 + + 2.0*(poly_vgl_2[il] * s2 + + poly_vgl_3[il] * s3 + + poly_vgl_4[il] * s4 )) * f[il]; + } + break; + default: +#ifdef HAVE_OPENMP +#pragma omp simd simdlen(8) +#endif + for (int il=0 ; ilao_basis.prim_num_per_nucleus ; ++i) { - } - */ - - const int64_t ishell_start = nucleus_index[inucl]; - const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; - for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - - int64_t nidx = 0; - const int64_t iprim_start = shell_prim_index[ishell]; - const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell]; - for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { - const double v = expo[iprim]*r2; - if (v <= cutoff) { - ar2[nidx] = v; - c_[nidx] = coef_normalized[iprim]; - expo_[nidx] = expo[iprim]; - ++nidx; - } - } - - double s1_ = 0.; - double s5_ = 0.; - double s6_ = 0.; - for (int idx=0 ; idx 0) { - const double* __restrict__ f = ao_factor + k; - const int64_t idx = lstart[l]; - - switch (nucleus_max_ang_mom[inucl]) { - case 0: - break; - case 1: - poly_vgl_1 = &(poly_vgl_l1[0][idx]); - poly_vgl_2 = &(poly_vgl_l1[1][idx]); - poly_vgl_3 = &(poly_vgl_l1[2][idx]); - poly_vgl_4 = &(poly_vgl_l1[3][idx]); - break; - case 2: - poly_vgl_1 = &(poly_vgl_l2[0][idx]); - poly_vgl_2 = &(poly_vgl_l2[1][idx]); - poly_vgl_3 = &(poly_vgl_l2[2][idx]); - poly_vgl_4 = &(poly_vgl_l2[3][idx]); - break; - default: - poly_vgl_1 = &(poly_vgl[0][idx]); - poly_vgl_2 = &(poly_vgl[1][idx]); - poly_vgl_3 = &(poly_vgl[2][idx]); - poly_vgl_4 = &(poly_vgl[3][idx]); - poly_vgl_5 = &(poly_vgl[4][idx]); - } - switch (n) { - case(1): - ao_vgl_1[0] = s1 * f[0]; - ao_vgl_2[0] = s2 * f[0]; - ao_vgl_3[0] = s3 * f[0]; - ao_vgl_4[0] = s4 * f[0]; - ao_vgl_5[0] = s5; - break; - case (3): -#ifdef HAVE_OPENMP - #pragma omp simd -#endif - for (int il=0 ; il<3 ; ++il) { - ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; - ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; - ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; - ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; - ao_vgl_5[il] = (poly_vgl_1[il] * s5 + - 2.0*(poly_vgl_2[il] * s2 + - poly_vgl_3[il] * s3 + - poly_vgl_4[il] * s4 )) * f[il]; - } - break; - case(5): -#ifdef HAVE_OPENMP - #pragma omp simd -#endif - for (int il=0 ; il<5 ; ++il) { - ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; - ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; - ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; - ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; - ao_vgl_5[il] = (poly_vgl_1[il] * s5 + - 2.0*(poly_vgl_2[il] * s2 + - poly_vgl_3[il] * s3 + - poly_vgl_4[il] * s4 )) * f[il]; - } - break; - default: -#ifdef HAVE_OPENMP - #pragma omp simd simdlen(8) -#endif - for (int il=0 ; ilao_basis.ao_num, ctx->ao_basis.shell_num, - ctx->ao_basis.prim_num, + ctx->ao_basis.prim_num_per_nucleus, ctx->point.num, ctx->nucleus.num, ctx->point.coord.data, @@ -5944,11 +5963,9 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) ctx->ao_basis.nucleus_range, ctx->ao_basis.nucleus_max_ang_mom, ctx->ao_basis.shell_ang_mom, - ctx->ao_basis.shell_prim_index, - ctx->ao_basis.shell_prim_num, ctx->ao_basis.ao_factor, - ctx->ao_basis.exponent, - ctx->ao_basis.coefficient_normalized, + ctx->ao_basis.expo_per_nucleus, + ctx->ao_basis.coef_per_nucleus, ctx->ao_basis.ao_vgl); /* } else if (ctx->ao_basis.type == 'S') { @@ -6023,77 +6040,74 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) import numpy as np from math import sqrt +h0 = 1.e-4 def f(a,x,y): return np.sum( [c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) +def fx(a,x,y): + return f(a,x,y) * (x[0] - y[0])**2 + + def df(a,x,y,n): - h0 = 1.e-6 - if n == 1: h = np.array([h0,0.,0.]) - elif n == 2: h = np.array([0.,h0,0.]) - elif n == 3: h = np.array([0.,0.,h0]) - return ( f(a,x+h,y) - f(a,x-h,y) ) / (2.*h0) + if n == 1: h = np.array([h0,0.,0.]) + elif n == 2: h = np.array([0.,h0,0.]) + elif n == 3: h = np.array([0.,0.,h0]) + return ( fx(a,x+h,y) - fx(a,x-h,y) ) / (2.*h0) +# return np.sum( [-2.*b * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) * (x-y)[n-1] def d2f(a,x,y,n): - h0 = 1.e-6 if n == 1: h = np.array([h0,0.,0.]) elif n == 2: h = np.array([0.,h0,0.]) elif n == 3: h = np.array([0.,0.,h0]) - return ( f(a,x+h,y) - 2.*f(a,x,y) + f(a,x-h,y) ) / h0**2 + return ( fx(a,x+h,y) - 2.*fx(a,x,y) + fx(a,x-h,y) ) / h0**2 +# return np.sum( [( (2.*b*(x-y)[n-1])**2 -2.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) def lf(a,x,y): +# return np.sum( [( (2.*b*np.linalg.norm(x-y))**2 -6.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) return d2f(a,x,y,1) + d2f(a,x,y,2) + d2f(a,x,y,3) + elec_26_w1 = np.array( [ 1.49050402641, 2.90106987953, -1.05920815468 ] ) elec_15_w2 = np.array( [ -2.20180344582,-1.9113150239, 2.2193744778600002 ] ) nucl_1 = np.array( [ -2.302574592081335e+00, -3.542027060505035e-01, -5.334129934317614e-02] ) #double ao_vgl[prim_num][5][elec_num]; x = elec_26_w1 ; y = nucl_1 -a = [( 403.830000, 0.001473 * 5.9876577632594533e+04), - ( 121.170000, 0.012672 * 7.2836806319891484e+03), - ( 46.345000, 0.058045 * 1.3549226646722386e+03), - ( 19.721000, 0.170510 * 3.0376315094739988e+02), - ( 8.862400, 0.318596 * 7.4924579607137730e+01), - ( 3.996200, 0.384502 * 1.8590543353806009e+01), - ( 1.763600, 0.273774 * 4.4423176930919421e+00), - ( 0.706190, 0.074397 * 8.9541051939952665e-01)] +a = [( 4.0382999999999998e+02, 1.4732000000000000e-03 * 5.9876577632594533e+04), + ( 1.2117000000000000e+02, 1.2672500000000000e-02 * 7.2836806319891484e+03), + ( 4.6344999999999999e+01, 5.8045100000000002e-02 * 1.3549226646722386e+03), + ( 1.9721000000000000e+01, 1.7051030000000000e-01 * 3.0376315094739988e+02), + ( 8.8623999999999992e+00, 3.1859579999999998e-01 * 7.4924579607137730e+01), + ( 3.9962000000000000e+00, 3.8450230000000002e-01 * 1.8590543353806009e+01), + ( 1.7636000000000001e+00, 2.7377370000000001e-01 * 4.4423176930919421e+00), + ( 7.0618999999999998e-01, 7.4396699999999996e-02 * 8.9541051939952665e-01)] norm = sqrt(3.) -print ( "[0][26][219] : %25.15e"%(f(a,x,y) * (x[0] - y[0])**2) ) -print ( "[1][26][219] : %25.15e"%(df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + 2.*f(a,x,y) * (x[0] - y[0])) ) +# x^2 * g(r) +print ( "[26][0][219] : %25.15e"%(fx(a,x,y)) ) +print ( "[26][1][219] : %25.15e"%(df(a,x,y,1)) ) +print ( "[26][2][219] : %25.15e"%(df(a,x,y,2)) ) +print ( "[26][3][219] : %25.15e"%(df(a,x,y,3)) ) +print ( "[26][4][219] : %25.15e"%(lf(a,x,y)) ) -print ( "[0][26][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) )) -print ( "[1][26][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) ) +print ( "[26][0][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) )) +print ( "[26][1][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) ) -print ( "[0][26][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) ) -print ( "[1][26][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) ) +print ( "[26][0][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) ) +print ( "[26][1][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) ) -print ( "[0][26][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) ) -print ( "[1][26][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) ) +print ( "[26][0][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) ) +print ( "[26][1][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) ) -print ( "[0][26][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) ) -print ( "[1][26][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) ) +print ( "[26][0][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) ) +print ( "[26][1][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) ) -print ( "[0][26][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) ) -print ( "[1][26][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) +print ( "[26][0][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) ) +print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) #+end_src #+RESULTS: - #+begin_example - [0][26][219] : 1.020302912653649e-08 - [1][26][219] : -4.153046808203204e-08 - [0][26][220] : 1.516649653540510e-08 - [1][26][220] : -7.725252615816528e-08 - [0][26][221] : -4.686389780112468e-09 - [1][26][221] : 2.387073693851122e-08 - [0][26][222] : 7.514847283937212e-09 - [1][26][222] : -4.025905373647693e-08 - [0][26][223] : -4.021924592380977e-09 - [1][26][223] : 2.154652944642284e-08 - [0][26][224] : 7.175074806631758e-10 - [1][26][224] : -3.843880138733679e-09 - #+end_example #+begin_src c :tangle (eval c_test) :exports none { @@ -6125,32 +6139,69 @@ rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), assert (rc == QMCKL_SUCCESS); printf("\n"); -printf(" ao_vgl ao_vgl[0][26][219] %25.15e\n", ao_vgl[26][0][219]); -printf(" ao_vgl ao_vgl[1][26][219] %25.15e\n", ao_vgl[26][1][219]); -printf(" ao_vgl ao_vgl[0][26][220] %25.15e\n", ao_vgl[26][0][220]); -printf(" ao_vgl ao_vgl[1][26][220] %25.15e\n", ao_vgl[26][1][220]); -printf(" ao_vgl ao_vgl[0][26][221] %25.15e\n", ao_vgl[26][0][221]); -printf(" ao_vgl ao_vgl[1][26][221] %25.15e\n", ao_vgl[26][1][221]); -printf(" ao_vgl ao_vgl[0][26][222] %25.15e\n", ao_vgl[26][0][222]); -printf(" ao_vgl ao_vgl[1][26][222] %25.15e\n", ao_vgl[26][1][222]); -printf(" ao_vgl ao_vgl[0][26][223] %25.15e\n", ao_vgl[26][0][223]); -printf(" ao_vgl ao_vgl[1][26][223] %25.15e\n", ao_vgl[26][1][223]); -printf(" ao_vgl ao_vgl[0][26][224] %25.15e\n", ao_vgl[26][0][224]); -printf(" ao_vgl ao_vgl[1][26][224] %25.15e\n", ao_vgl[26][1][224]); +printf(" ao_vgl ao_vgl[26][0][219] %25.15e\n", ao_vgl[26][0][219]); +printf(" ao_vgl ao_vgl[26][1][219] %25.15e\n", ao_vgl[26][1][219]); +printf(" ao_vgl ao_vgl[26][2][219] %25.15e\n", ao_vgl[26][2][219]); +printf(" ao_vgl ao_vgl[26][3][219] %25.15e\n", ao_vgl[26][3][219]); +printf(" ao_vgl ao_vgl[26][4][219] %25.15e\n", ao_vgl[26][4][219]); +printf(" ao_vgl ao_vgl[26][0][220] %25.15e\n", ao_vgl[26][0][220]); +printf(" ao_vgl ao_vgl[26][1][220] %25.15e\n", ao_vgl[26][1][220]); +printf(" ao_vgl ao_vgl[26][2][220] %25.15e\n", ao_vgl[26][2][220]); +printf(" ao_vgl ao_vgl[26][3][220] %25.15e\n", ao_vgl[26][3][220]); +printf(" ao_vgl ao_vgl[26][4][220] %25.15e\n", ao_vgl[26][4][220]); +printf(" ao_vgl ao_vgl[26][0][221] %25.15e\n", ao_vgl[26][0][221]); +printf(" ao_vgl ao_vgl[26][1][221] %25.15e\n", ao_vgl[26][1][221]); +printf(" ao_vgl ao_vgl[26][2][221] %25.15e\n", ao_vgl[26][2][221]); +printf(" ao_vgl ao_vgl[26][3][221] %25.15e\n", ao_vgl[26][3][221]); +printf(" ao_vgl ao_vgl[26][4][221] %25.15e\n", ao_vgl[26][4][221]); +printf(" ao_vgl ao_vgl[26][0][222] %25.15e\n", ao_vgl[26][0][222]); +printf(" ao_vgl ao_vgl[26][1][222] %25.15e\n", ao_vgl[26][1][222]); +printf(" ao_vgl ao_vgl[26][2][222] %25.15e\n", ao_vgl[26][2][222]); +printf(" ao_vgl ao_vgl[26][3][222] %25.15e\n", ao_vgl[26][3][222]); +printf(" ao_vgl ao_vgl[26][4][222] %25.15e\n", ao_vgl[26][4][222]); +printf(" ao_vgl ao_vgl[26][0][223] %25.15e\n", ao_vgl[26][0][223]); +printf(" ao_vgl ao_vgl[26][1][223] %25.15e\n", ao_vgl[26][1][223]); +printf(" ao_vgl ao_vgl[26][2][223] %25.15e\n", ao_vgl[26][2][223]); +printf(" ao_vgl ao_vgl[26][3][223] %25.15e\n", ao_vgl[26][3][223]); +printf(" ao_vgl ao_vgl[26][4][223] %25.15e\n", ao_vgl[26][4][223]); +printf(" ao_vgl ao_vgl[26][0][224] %25.15e\n", ao_vgl[26][0][224]); +printf(" ao_vgl ao_vgl[26][1][224] %25.15e\n", ao_vgl[26][1][224]); +printf(" ao_vgl ao_vgl[26][2][224] %25.15e\n", ao_vgl[26][2][224]); +printf(" ao_vgl ao_vgl[26][3][224] %25.15e\n", ao_vgl[26][3][224]); +printf(" ao_vgl ao_vgl[26][4][224] %25.15e\n", ao_vgl[26][4][224]); printf("\n"); -assert( fabs(ao_vgl[26][0][219] - ( 1.020298798341620e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][219] - (-4.928035238010602e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][220] - ( 1.516643537739178e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][220] - (-7.725221462603871e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][221] - (-4.686370882518819e-09)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][221] - ( 2.387064067626827e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][222] - ( 7.514816980753531e-09)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][222] - (-4.025889138635182e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][223] - (-4.021908374204471e-09)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][223] - ( 2.154644255710413e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][224] - ( 7.175045873560788e-10)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][219] - ( 1.020298798341620e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][219] - ( -4.928035238010602e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][219] - ( -4.691009312035986e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][219] - ( 1.449504046436699e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][219] - ( 4.296442111843973e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][220] - ( 1.516643537739178e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][220] - ( -7.725221462603871e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][220] - ( -6.507140835104833e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][220] - ( 2.154644255710413e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][220] - ( 6.365449359656352e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][221] - ( -4.686370882518819e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][221] - ( 2.387064067626827e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][221] - ( 2.154644255710412e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][221] - ( -1.998731863512374e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][221] - ( -1.966899656441993e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][222] - ( 7.514816980753531e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][222] - ( -4.025889138635182e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][222] - ( -2.993372555126361e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][222] - ( 1.067604670272904e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][222] - ( 3.168199650002648e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][223] - ( -4.021908374204471e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][223] - ( 2.154644255710413e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][223] - ( 1.725594944732276e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][223] - ( -1.715339357718333e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][223] - ( -1.688020516893476e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][224] - ( 7.175045873560788e-10)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][224] - ( -3.843864637762753e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][224] - ( -3.298857850451910e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][224] - ( -4.073047518790881e-10)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][224] - ( 3.153244195820293e-08)) < 1.e-14 ); + } #+end_src @@ -6204,6 +6255,5 @@ assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 ); # -*- mode: org -*- # vim: syntax=c - * TODO [0/1] Missing features :noexport: - [ ] Error messages to tell what is missing when initializing diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 73f9e46..9cd7e18 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -85,7 +85,7 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { int64_t size; - double* data; + double* restrict data; } qmckl_vector; #+end_src @@ -161,7 +161,7 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { int64_t size[2]; - double* data; + double* restrict data; } qmckl_matrix; #+end_src @@ -247,7 +247,7 @@ qmckl_matrix_free( qmckl_context context, typedef struct qmckl_tensor { int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* data; + double* restrict data; } qmckl_tensor; #+end_src diff --git a/tools/theme.setup b/tools/theme.setup index 156662b..5849e08 100644 --- a/tools/theme.setup +++ b/tools/theme.setup @@ -6,7 +6,9 @@ #+INFOJS_OPT: toc:t mouse:underline path:org-info.js #+HTML_HEAD: -#+STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview +# STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview +#+STARTUP: showeverything + #+AUTHOR: TREX CoE #+LANGUAGE: en