diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8cab593..6eff600 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3566,7 +3566,7 @@ assert(0 == test_qmckl_ao_gaussian_vgl(context)); const double* coord, const double* nucl_coord, const double* expo, - double* const primitive_vgl ); + double* const primitive_vgl ); #+end_src @@ -3578,6 +3578,8 @@ function qmckl_compute_ao_basis_primitive_gaussian_vgl & use, intrinsic :: iso_c_binding use qmckl_constants + + use qmckl, only: qmckl_get_numprec_epsilon implicit none integer (qmckl_context), intent(in) , value :: context integer (c_int64_t) , intent(in) , value :: prim_num @@ -3596,7 +3598,7 @@ function qmckl_compute_ao_basis_primitive_gaussian_vgl & info = QMCKL_SUCCESS ! Don't compute exponentials when the result will be almost zero. - cutoff = 27.631021115928547d0 ! -dlog(1.d-12) + cutoff = -dlog(qmckl_get_numprec_epsilon(context)) do inucl=1,nucl_num ! C is zero-based, so shift bounds by one @@ -3867,7 +3869,7 @@ print ( "[7][4][26] : %e"% lf(a,x,y)) const double* nucl_coord, const double* expo, const double* coef_normalized, - double* const shell_vgl ); + double* const shell_vgl ); #+end_src #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -3880,6 +3882,7 @@ function qmckl_compute_ao_basis_shell_gaussian_vgl( & use, intrinsic :: iso_c_binding use qmckl_constants + use qmckl, only: qmckl_get_numprec_epsilon implicit none integer (qmckl_context), intent(in) , value :: context @@ -3908,7 +3911,7 @@ function qmckl_compute_ao_basis_shell_gaussian_vgl( & ! Don't compute exponentials when the result will be almost zero. ! TODO : Use numerical precision here - cutoff = 27.631021115928547d0 !-dlog(1.d-12) + cutoff = -dlog(qmckl_get_numprec_epsilon(context)) do ipoint = 1, point_num @@ -4237,7 +4240,7 @@ print ( "[1][4][26] : %25.15e"% lf(a,x,y)) const double* X, const int32_t* LMAX, double* const P, - const int64_t ldp ); + const int64_t ldp ); #+end_src #+begin_src f90 :tangle (eval f) @@ -4317,7 +4320,7 @@ end function qmckl_ao_power *** Test :noexport: #+begin_src f90 :tangle (eval f_test) -function test_qmckl_ao_power(context) bind(C) +function test_qmckl_ao_power(context) bind(C) use qmckl implicit none @@ -4452,7 +4455,7 @@ assert(0 == test_qmckl_ao_power(context)); int32_t* const L, const int64_t ldl, double* const VGL, - const int64_t ldv ); + const int64_t ldv ); #+end_src #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_vgl_doc") @@ -4468,7 +4471,7 @@ assert(0 == test_qmckl_ao_power(context)); int32_t* const L, const int64_t ldl, double* const VGL, - const int64_t ldv ); + const int64_t ldv ); #+end_src #+begin_src c :tangle (eval c) :comments org @@ -4693,7 +4696,7 @@ end function qmckl_ao_polynomial_vgl_doc int32_t* const L, const int64_t ldl, double* const VGL, - const int64_t ldv ); + const int64_t ldv ); #+end_src #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl_doc") @@ -4709,7 +4712,7 @@ end function qmckl_ao_polynomial_vgl_doc int32_t* const L, const int64_t ldl, double* const VGL, - const int64_t ldv ); + const int64_t ldv ); #+end_src # #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl_hpc") @@ -5204,7 +5207,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, *** Test :noexport: #+begin_src f90 :tangle (eval f_test) -function test_qmckl_ao_polynomial_vgl(context) bind(C) +function test_qmckl_ao_polynomial_vgl(context) bind(C) use qmckl implicit none @@ -5579,7 +5582,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, float exp_mat_sp[prim_max] __attribute__((aligned(64))); double ce_mat[shell_max] __attribute__((aligned(64))); - int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = + int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = malloc(nucl_num * sizeof (*coef_mat_sparse_idx)); double (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) = @@ -6365,7 +6368,7 @@ qmckl_compute_ao_vgl_hpc_gaussian ( float exp_mat_sp[prim_max] __attribute__((aligned(64))); double ce_mat[shell_max][8] __attribute__((aligned(64))) ; - int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = + int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = malloc(nucl_num * sizeof (*coef_mat_sparse_idx)); double (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) = diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 29cb961..bf58dac 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -1309,18 +1309,23 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context, } #ifdef HAVE_OPENMP -#pragma omp parallel for +#pragma omp parallel +#endif + { + int64_t* __attribute__((aligned(64))) idx = calloc((size_t) ao_num, sizeof(int64_t)); + float* __attribute__((aligned(64))) av1 = calloc((size_t) ao_num, sizeof(float)); + assert (idx != NULL); + assert (av1 != NULL); + +#ifdef HAVE_OPENMP +#pragma omp for #endif for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { double* restrict const vgl1 = &(mo_value[ipoint*mo_num]); const double* restrict avgl1 = &(ao_value[ipoint*ao_num]); // memset(vgl_sp, 0, mo_num*sizeof(float)); - int64_t* __attribute__((aligned(64))) idx = calloc((size_t) ao_num, sizeof(int64_t)); - float* __attribute__((aligned(64))) av1 = calloc((size_t) ao_num, sizeof(float)); float* __attribute__((aligned(64))) vgl_sp = calloc((size_t) mo_num, sizeof(float)); - assert (idx != NULL); - assert (av1 != NULL); assert (vgl_sp != NULL); int64_t nidx=0; @@ -1374,9 +1379,10 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context, for (int64_t i=0 ; i