mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-05 11:00:36 +01:00
Fix bug in HPC AOs
This commit is contained in:
parent
73399e24ec
commit
d919c53c42
@ -222,13 +222,13 @@ AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])],
|
|||||||
if test "$ok" = "yes"; then
|
if test "$ok" = "yes"; then
|
||||||
if test "$GCC" = "yes"; then
|
if test "$GCC" = "yes"; then
|
||||||
CFLAGS="$CFLAGS \
|
CFLAGS="$CFLAGS \
|
||||||
-Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \
|
-g -Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \
|
||||||
-Wpointer-arith -Wcast-align -Wpedantic -Wextra -Walloc-zero -Werror \
|
-Wpointer-arith -Wcast-align -Wpedantic -Wextra -Walloc-zero -Werror \
|
||||||
"
|
"
|
||||||
fi
|
fi
|
||||||
if test "$GFC" = "yes"; then
|
if test "$GFC" = "yes"; then
|
||||||
FCFLAGS="$FCFLAGS \
|
FCFLAGS="$FCFLAGS \
|
||||||
-fcheck=all -Waliasing -Wampersand -Wconversion \
|
-g -fcheck=all -Waliasing -Wampersand -Wconversion \
|
||||||
-Wsurprising -ffpe-trap=zero,overflow,underflow \
|
-Wsurprising -ffpe-trap=zero,overflow,underflow \
|
||||||
-Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation \
|
-Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation \
|
||||||
-Wreal-q-constant -Wuninitialized -fbacktrace -finit-real=nan"
|
-Wreal-q-constant -Wuninitialized -fbacktrace -finit-real=nan"
|
||||||
|
@ -4605,7 +4605,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
VGL[0 ] = 1.0;
|
VGL[0 ] = 1.0;
|
||||||
VGL[ldv ] = 0.0;
|
VGL[ldv ] = 0.0;
|
||||||
VGL[ldv<<1 ] = 0.0;
|
VGL[ldv<<1 ] = 0.0;
|
||||||
VGL[(ldv<<2)-1] = 0.0;
|
VGL[(ldv<<1)+ldv] = 0.0;
|
||||||
VGL[ldv<<2 ] = 0.0;
|
VGL[ldv<<2 ] = 0.0;
|
||||||
m=1;
|
m=1;
|
||||||
break;
|
break;
|
||||||
@ -4622,7 +4622,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl};
|
int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)};
|
||||||
l[0][0] = 0; l[0][1] = 0; l[0][2] = 0;
|
l[0][0] = 0; l[0][1] = 0; l[0][2] = 0;
|
||||||
l[1][0] = 1; l[1][1] = 0; l[1][2] = 0;
|
l[1][0] = 1; l[1][1] = 0; l[1][2] = 0;
|
||||||
l[2][0] = 0; l[2][1] = 1; l[2][2] = 0;
|
l[2][0] = 0; l[2][1] = 1; l[2][2] = 0;
|
||||||
@ -4646,7 +4646,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
double* const vgl1 = VGL;
|
double* const vgl1 = VGL;
|
||||||
double* const vgl2 = VGL + ldv;
|
double* const vgl2 = VGL + ldv;
|
||||||
double* const vgl3 = VGL + (ldv << 1);
|
double* const vgl3 = VGL + (ldv << 1);
|
||||||
double* const vgl4 = VGL + 3*ldv;
|
double* const vgl4 = VGL + ldv + (ldv << 1);
|
||||||
double* const vgl5 = VGL + (ldv << 2);
|
double* const vgl5 = VGL + (ldv << 2);
|
||||||
|
|
||||||
for (int32_t k=0 ; k<4 ; ++k) {
|
for (int32_t k=0 ; k<4 ; ++k) {
|
||||||
@ -4747,12 +4747,6 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
m=10;
|
m=10;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
case 3:
|
|
||||||
{
|
|
||||||
if (ldv < 20) return QMCKL_INVALID_ARG_9;
|
|
||||||
}
|
|
||||||
,*/
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6;
|
const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6;
|
||||||
@ -4761,16 +4755,17 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
double* const vgl1 = VGL;
|
double* const vgl1 = VGL;
|
||||||
double* const vgl2 = VGL + ldv;
|
double* const vgl2 = VGL + ldv;
|
||||||
double* const vgl3 = VGL + (ldv<<1);
|
double* const vgl3 = VGL + (ldv<<1);
|
||||||
double* const vgl4 = VGL + 3*ldv;
|
double* const vgl4 = VGL + ldv + (ldv<<1);
|
||||||
double* const vgl5 = VGL + (ldv<<2);
|
double* const vgl5 = VGL + (ldv<<2);
|
||||||
|
|
||||||
const double Y[3] = { X[0]-R[0],
|
const double Y[3] = { X[0]-R[0],
|
||||||
X[1]-R[1],
|
X[1]-R[1],
|
||||||
X[2]-R[2] };
|
X[2]-R[2] };
|
||||||
|
|
||||||
|
assert(size_max > lmax+3);
|
||||||
double pows[3][size_max];
|
double pows[3][size_max];
|
||||||
|
|
||||||
for (int32_t i=0 ; i<=2 ; ++i) {
|
for (int32_t i=0 ; i<3 ; ++i) {
|
||||||
pows[0][i] = 1.0;
|
pows[0][i] = 1.0;
|
||||||
pows[1][i] = 1.0;
|
pows[1][i] = 1.0;
|
||||||
pows[2][i] = 1.0;
|
pows[2][i] = 1.0;
|
||||||
@ -4782,9 +4777,9 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
pows[2][i] = pows[2][i-1] * Y[2];
|
pows[2][i] = pows[2][i-1] * Y[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t* l[24];
|
int32_t* l[size_max];
|
||||||
for (int32_t i=0 ; i<size_max ; ++i) {
|
for (int32_t i=0 ; i<size_max ; ++i) {
|
||||||
l[i] = L + i*ldl;
|
l[i] = &(L[i*ldl]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int32_t i=0 ; i<4 ; ++i) {
|
for (int32_t i=0 ; i<4 ; ++i) {
|
||||||
@ -5040,13 +5035,13 @@ assert(0 == test_qmckl_ao_polynomial_vgl(context));
|
|||||||
|
|
||||||
double X[3] = { 1.1, 2.2, 3.3 };
|
double X[3] = { 1.1, 2.2, 3.3 };
|
||||||
double R[3] = { 0.2, 1.1, 3.0 };
|
double R[3] = { 0.2, 1.1, 3.0 };
|
||||||
int32_t ldv[4] = {1, 4, 10, 20};
|
int32_t ldv[8] = {1, 4, 10, 20, 35, 56, 84, 120};
|
||||||
for (int32_t ldl=3 ; ldl<5 ; ++ldl) {
|
for (int32_t ldl=3 ; ldl<=5 ; ++ldl) {
|
||||||
int64_t n;
|
int64_t n;
|
||||||
int32_t L0[24][ldl];
|
int32_t L0[200][ldl];
|
||||||
int32_t L1[24][ldl];
|
int32_t L1[200][ldl];
|
||||||
printf("ldl=%d\n", ldl);
|
printf("ldl=%d\n", ldl);
|
||||||
for (int32_t lmax=0 ; lmax<=3 ; lmax++) {
|
for (int32_t lmax=0 ; lmax<=7 ; lmax++) {
|
||||||
double VGL0[5][ldv[lmax]];
|
double VGL0[5][ldv[lmax]];
|
||||||
double VGL1[5][ldv[lmax]];
|
double VGL1[5][ldv[lmax]];
|
||||||
memset(&L0[0][0], 0, sizeof(L0));
|
memset(&L0[0][0], 0, sizeof(L0));
|
||||||
@ -5237,7 +5232,7 @@ end function qmckl_compute_ao_vgl_doc_f
|
|||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** HPC version
|
** HPC version
|
||||||
#+NAME: qmckl_ao_vgl_args_hpc
|
#+NAME: qmckl_ao_vgl_args_hpc_gaussian
|
||||||
| Variable | Type | In/Out | Description |
|
| Variable | Type | In/Out | Description |
|
||||||
|-----------------------+--------------------------------+--------+----------------------------------------------|
|
|-----------------------+--------------------------------+--------+----------------------------------------------|
|
||||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
@ -5262,7 +5257,7 @@ end function qmckl_compute_ao_vgl_doc_f
|
|||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
|
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
|
||||||
qmckl_exit_code
|
qmckl_exit_code
|
||||||
qmckl_compute_ao_vgl_hpc (
|
qmckl_compute_ao_vgl_hpc_gaussian (
|
||||||
const qmckl_context context,
|
const qmckl_context context,
|
||||||
const int64_t ao_num,
|
const int64_t ao_num,
|
||||||
const int64_t shell_num,
|
const int64_t shell_num,
|
||||||
@ -5283,8 +5278,8 @@ qmckl_compute_ao_vgl_hpc (
|
|||||||
const double* coef_normalized,
|
const double* coef_normalized,
|
||||||
double* const ao_vgl )
|
double* const ao_vgl )
|
||||||
{
|
{
|
||||||
int32_t lstart[24];
|
int32_t lstart[32];
|
||||||
for (int32_t l=0 ; l<24 ; ++l) {
|
for (int32_t l=0 ; l<32 ; ++l) {
|
||||||
lstart[l] = l*(l+1)*(l+2)/6;
|
lstart[l] = l*(l+1)*(l+2)/6;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5398,7 +5393,6 @@ qmckl_compute_ao_vgl_hpc (
|
|||||||
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
||||||
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
||||||
|
|
||||||
/* /!\ Gaussian fuctions */
|
|
||||||
int64_t nidx = 0;
|
int64_t nidx = 0;
|
||||||
const int64_t iprim_start = shell_prim_index[ishell];
|
const int64_t iprim_start = shell_prim_index[ishell];
|
||||||
const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell];
|
const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell];
|
||||||
@ -5429,6 +5423,7 @@ qmckl_compute_ao_vgl_hpc (
|
|||||||
const double s4 = s6_*z;
|
const double s4 = s6_*z;
|
||||||
const double s5 = s5_;
|
const double s5 = s5_;
|
||||||
|
|
||||||
|
|
||||||
const int32_t l = shell_ang_mom[ishell];
|
const int32_t l = shell_ang_mom[ishell];
|
||||||
const int32_t n = lstart[l+1]-lstart[l];
|
const int32_t n = lstart[l+1]-lstart[l];
|
||||||
const int64_t k = ao_index[ishell];
|
const int64_t k = ao_index[ishell];
|
||||||
@ -5550,7 +5545,7 @@ qmckl_compute_ao_vgl_hpc (
|
|||||||
double* const ao_vgl );
|
double* const ao_vgl );
|
||||||
#+end_src
|
#+end_src
|
||||||
#+begin_src c :tangle (eval h_private_func) :comments org
|
#+begin_src c :tangle (eval h_private_func) :comments org
|
||||||
qmckl_exit_code qmckl_compute_ao_vgl_hpc (
|
qmckl_exit_code qmckl_compute_ao_vgl_hpc_gaussian (
|
||||||
const qmckl_context context,
|
const qmckl_context context,
|
||||||
const int64_t ao_num,
|
const int64_t ao_num,
|
||||||
const int64_t shell_num,
|
const int64_t shell_num,
|
||||||
@ -5691,7 +5686,8 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
|
|||||||
ctx->ao_basis.ao_vgl = ao_vgl;
|
ctx->ao_basis.ao_vgl = ao_vgl;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_HPC
|
#ifdef HAVE_HPC
|
||||||
rc = qmckl_compute_ao_vgl_hpc(context,
|
if (ctx->ao_basis.type == 'G') {
|
||||||
|
rc = qmckl_compute_ao_vgl_hpc_gaussian(context,
|
||||||
ctx->ao_basis.ao_num,
|
ctx->ao_basis.ao_num,
|
||||||
ctx->ao_basis.shell_num,
|
ctx->ao_basis.shell_num,
|
||||||
ctx->ao_basis.prim_num,
|
ctx->ao_basis.prim_num,
|
||||||
@ -5710,6 +5706,45 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context)
|
|||||||
ctx->ao_basis.exponent,
|
ctx->ao_basis.exponent,
|
||||||
ctx->ao_basis.coefficient_normalized,
|
ctx->ao_basis.coefficient_normalized,
|
||||||
ctx->ao_basis.ao_vgl);
|
ctx->ao_basis.ao_vgl);
|
||||||
|
/*
|
||||||
|
} else if (ctx->ao_basis.type == 'S') {
|
||||||
|
rc = qmck_compute_ao_vgl_hpc_slater(context,
|
||||||
|
ctx->ao_basis.ao_num,
|
||||||
|
ctx->ao_basis.shell_num,
|
||||||
|
ctx->ao_basis.prim_num,
|
||||||
|
ctx->point.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->point.coord.data,
|
||||||
|
ctx->nucleus.coord.data,
|
||||||
|
ctx->ao_basis.nucleus_index,
|
||||||
|
ctx->ao_basis.nucleus_shell_num,
|
||||||
|
ctx->ao_basis.nucleus_range,
|
||||||
|
ctx->ao_basis.nucleus_max_ang_mom,
|
||||||
|
ctx->ao_basis.shell_ang_mom,
|
||||||
|
ctx->ao_basis.shell_prim_index,
|
||||||
|
ctx->ao_basis.shell_prim_num,
|
||||||
|
ctx->ao_basis.ao_factor,
|
||||||
|
ctx->ao_basis.exponent,
|
||||||
|
ctx->ao_basis.coefficient_normalized,
|
||||||
|
ctx->ao_basis.ao_vgl);
|
||||||
|
,*/
|
||||||
|
} else {
|
||||||
|
rc = qmckl_compute_ao_vgl_doc(context,
|
||||||
|
ctx->ao_basis.ao_num,
|
||||||
|
ctx->ao_basis.shell_num,
|
||||||
|
ctx->point.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->point.coord.data,
|
||||||
|
ctx->nucleus.coord.data,
|
||||||
|
ctx->ao_basis.nucleus_index,
|
||||||
|
ctx->ao_basis.nucleus_shell_num,
|
||||||
|
ctx->ao_basis.nucleus_range,
|
||||||
|
ctx->ao_basis.nucleus_max_ang_mom,
|
||||||
|
ctx->ao_basis.shell_ang_mom,
|
||||||
|
ctx->ao_basis.ao_factor,
|
||||||
|
ctx->ao_basis.shell_vgl,
|
||||||
|
ctx->ao_basis.ao_vgl);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
rc = qmckl_compute_ao_vgl_doc(context,
|
rc = qmckl_compute_ao_vgl_doc(context,
|
||||||
ctx->ao_basis.ao_num,
|
ctx->ao_basis.ao_num,
|
||||||
|
Loading…
Reference in New Issue
Block a user