mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Switch for asm in AOs
This commit is contained in:
parent
07e1e44f05
commit
c0e82939ac
@ -6523,6 +6523,23 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
|
|
||||||
|
|
||||||
/* --- */
|
/* --- */
|
||||||
|
switch (8) {
|
||||||
|
case(5):
|
||||||
|
|
||||||
|
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||||
|
for (int j=0 ; j<5 ; ++j) {
|
||||||
|
ce_mat[i][j] = 0.;
|
||||||
|
}
|
||||||
|
for (int k=0 ; k<nidx; ++k) {
|
||||||
|
for (int j=0 ; j<5 ; ++j) {
|
||||||
|
ce_mat[i][j] += coef_mat[inucl][i][k] * exp_mat[k][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
case(8):
|
||||||
|
|
||||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd simdlen(8)
|
#pragma omp simd simdlen(8)
|
||||||
@ -6540,9 +6557,11 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
break;
|
||||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
case(256):
|
||||||
|
|
||||||
// Following loop is the assembly version AVX2
|
// Following loop is the assembly version AVX2
|
||||||
|
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
"mov %[k],%%RSI" "\n\t" // &(nidx)
|
"mov %[k],%%RSI" "\n\t" // &(nidx)
|
||||||
"mov %[a],%%RAX" "\n\t" // &(coef_mat[inucl][i][k])
|
"mov %[a],%%RAX" "\n\t" // &(coef_mat[inucl][i][k])
|
||||||
@ -6577,10 +6596,10 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
"ymm0", "ymm1", "ymm2", "memory" );
|
"ymm0", "ymm1", "ymm2", "memory" );
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
,*/
|
|
||||||
|
|
||||||
/*
|
break;
|
||||||
|
case(512):
|
||||||
|
|
||||||
// Following loop is the assembly version AVX512
|
// Following loop is the assembly version AVX512
|
||||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||||
__asm__ volatile (
|
__asm__ volatile (
|
||||||
@ -6608,7 +6627,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
: : [k] "m"(nidx), [c] "m"(&(ce_mat[i][0])), [a] "m"(&(coef_mat[inucl][i][0])), [b] "m"(&(exp_mat[0][0])) : "rax", "rbx", "rcx", "rsi", "zmm0", "zmm1", "memory" );
|
: : [k] "m"(nidx), [c] "m"(&(ce_mat[i][0])), [a] "m"(&(coef_mat[inucl][i][0])), [b] "m"(&(exp_mat[0][0])) : "rax", "rbx", "rcx", "rsi", "zmm0", "zmm1", "memory" );
|
||||||
|
|
||||||
}
|
}
|
||||||
,*/
|
}
|
||||||
|
|
||||||
const int64_t ishell_start = nucleus_index[inucl];
|
const int64_t ishell_start = nucleus_index[inucl];
|
||||||
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
||||||
|
Loading…
Reference in New Issue
Block a user