mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-23 04:44:03 +01:00
Optimized polynomials
This commit is contained in:
parent
e90e9a531c
commit
733d941c30
243
org/qmckl_ao.org
243
org/qmckl_ao.org
@ -4530,45 +4530,57 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
|
|
||||||
int32_t m;
|
int32_t m;
|
||||||
|
|
||||||
|
switch (lmax) {
|
||||||
|
case 0:
|
||||||
|
{
|
||||||
|
if (ldv < 1) return QMCKL_INVALID_ARG_9;
|
||||||
|
L[0] = 0; L[1] = 0; L[2] = 0;
|
||||||
|
|
||||||
|
VGL[0 ] = 1.0;
|
||||||
|
VGL[ldv ] = 0.0;
|
||||||
|
VGL[ldv<<1 ] = 0.0;
|
||||||
|
VGL[ldv<<2-1] = 0.0;
|
||||||
|
VGL[ldv<<2 ] = 0.0;
|
||||||
|
m=1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1:
|
||||||
|
{
|
||||||
|
if (ldv < 4) return QMCKL_INVALID_ARG_9;
|
||||||
|
|
||||||
|
if (ldl == 3) {
|
||||||
|
|
||||||
|
const int32_t ltmp[12] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1};
|
||||||
|
for (int i=0 ; i<12 ; ++i)
|
||||||
|
L[i] = ltmp[i];
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl};
|
||||||
|
l[0][0] = 0; l[0][1] = 0; l[0][2] = 0;
|
||||||
|
l[1][0] = 1; l[1][1] = 0; l[1][2] = 0;
|
||||||
|
l[2][0] = 0; l[2][1] = 1; l[2][2] = 0;
|
||||||
|
l[3][0] = 0; l[3][1] = 0; l[3][2] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (ldv == 4) {
|
||||||
|
|
||||||
|
const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], 0.0,
|
||||||
|
1.0, 0.0, 0.0, 0.0, 0.0,
|
||||||
|
1.0, 0.0, 0.0, 0.0, 0.0,
|
||||||
|
1.0, 0.0, 0.0, 0.0, 0.0};
|
||||||
|
for (int i=0 ; i<20 ; ++i)
|
||||||
|
VGL[i] = tmp[i];
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
double* const vgl1 = VGL;
|
double* const vgl1 = VGL;
|
||||||
double* const vgl2 = VGL + ldv;
|
double* const vgl2 = VGL + ldv;
|
||||||
double* const vgl3 = VGL + (ldv << 1);
|
double* const vgl3 = VGL + (ldv << 1);
|
||||||
double* const vgl4 = VGL + 3*ldv;
|
double* const vgl4 = VGL + 3*ldv;
|
||||||
double* const vgl5 = VGL + (ldv << 2);
|
double* const vgl5 = VGL + (ldv << 2);
|
||||||
|
|
||||||
switch (lmax) {
|
|
||||||
case 0:
|
|
||||||
{
|
|
||||||
if (ldv < 1) return QMCKL_INVALID_ARG_9;
|
|
||||||
L[0] = 0;
|
|
||||||
L[ldl] = 0;
|
|
||||||
L[ldl << 1] = 0;
|
|
||||||
m=1;
|
|
||||||
|
|
||||||
vgl1[0] = 1.0;
|
|
||||||
vgl2[0] = 0.0;
|
|
||||||
vgl3[0] = 0.0;
|
|
||||||
vgl4[0] = 0.0;
|
|
||||||
vgl5[0] = 0.0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case 1:
|
|
||||||
{
|
|
||||||
if (ldv < 4) return QMCKL_INVALID_ARG_9;
|
|
||||||
const double Y[3] = { X[0]-R[0],
|
|
||||||
X[1]-R[1],
|
|
||||||
X[2]-R[2] };
|
|
||||||
|
|
||||||
int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl};
|
|
||||||
for (int32_t i=0 ; i<4 ; ++i) {
|
|
||||||
l[i][0] = 0;
|
|
||||||
l[i][1] = 0;
|
|
||||||
l[i][2] = 0;
|
|
||||||
}
|
|
||||||
l[1][0] = 1;
|
|
||||||
l[2][1] = 1;
|
|
||||||
l[3][2] = 1;
|
|
||||||
|
|
||||||
for (int32_t k=0 ; k<4 ; ++k) {
|
for (int32_t k=0 ; k<4 ; ++k) {
|
||||||
vgl2[k] = 0.0;
|
vgl2[k] = 0.0;
|
||||||
vgl3[k] = 0.0;
|
vgl3[k] = 0.0;
|
||||||
@ -4576,92 +4588,95 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
vgl5[k] = 0.0;
|
vgl5[k] = 0.0;
|
||||||
}
|
}
|
||||||
vgl1[0] = 1.0;
|
vgl1[0] = 1.0;
|
||||||
vgl1[1] = Y[0];
|
vgl1[1] = X[0]-R[0];
|
||||||
vgl1[2] = Y[1];
|
vgl1[2] = X[1]-R[1];
|
||||||
vgl1[3] = Y[2];
|
vgl1[3] = X[2]-R[2];
|
||||||
vgl2[1] = 1.0;
|
vgl2[1] = 1.0;
|
||||||
vgl3[2] = 1.0;
|
vgl3[2] = 1.0;
|
||||||
vgl4[3] = 1.0;
|
vgl4[3] = 1.0;
|
||||||
|
}
|
||||||
m=4;
|
m=4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
if (ldv < 10) return QMCKL_INVALID_ARG_9;
|
if (ldv < 10) return QMCKL_INVALID_ARG_9;
|
||||||
const double Y[3] = { X[0]-R[0],
|
if (ldl == 3) {
|
||||||
X[1]-R[1],
|
const int32_t ltmp[30] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
|
||||||
X[2]-R[2] };
|
2, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0,
|
||||||
|
0, 1, 1, 0, 0, 2};
|
||||||
double pows[3][5] = { {1.0, 1.0, 1.0, Y[0], Y[0]*Y[0]},
|
for (int i=0 ; i<30 ; ++i)
|
||||||
{1.0, 1.0, 1.0, Y[1], Y[1]*Y[1]},
|
L[i] = ltmp[i];
|
||||||
{1.0, 1.0, 1.0, Y[2], Y[2]*Y[2]} };
|
|
||||||
|
|
||||||
|
} else {
|
||||||
int32_t* l[10];
|
int32_t* l[10];
|
||||||
for (int32_t i=0 ; i<10 ; ++i) {
|
for (int32_t i=0 ; i<10 ; ++i) {
|
||||||
l[i] = L + i*ldl;
|
l[i] = L + i*ldl;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int32_t i=0 ; i<4 ; ++i) {
|
l[0][0] = 0; l[0][1] = 0; l[0][2] = 0;
|
||||||
l[i][0] = 0;
|
l[1][0] = 1; l[1][1] = 0; l[1][2] = 0;
|
||||||
l[i][1] = 0;
|
l[2][0] = 0; l[2][1] = 1; l[2][2] = 0;
|
||||||
l[i][2] = 0;
|
l[3][0] = 0; l[3][1] = 0; l[3][2] = 1;
|
||||||
|
l[4][0] = 2; l[4][1] = 0; l[4][2] = 0;
|
||||||
|
l[5][0] = 1; l[5][1] = 1; l[5][2] = 0;
|
||||||
|
l[6][0] = 1; l[6][1] = 0; l[6][2] = 1;
|
||||||
|
l[7][0] = 0; l[7][1] = 2; l[7][2] = 0;
|
||||||
|
l[8][0] = 0; l[8][1] = 1; l[8][2] = 1;
|
||||||
|
l[9][0] = 0; l[9][1] = 0; l[9][2] = 2;
|
||||||
}
|
}
|
||||||
l[1][0] = 1;
|
|
||||||
l[2][1] = 1;
|
|
||||||
l[3][2] = 1;
|
|
||||||
|
|
||||||
for (int32_t k=0 ; k<4 ; ++k) {
|
const double Y[3] = { X[0]-R[0],
|
||||||
vgl2[k] = 0.0;
|
X[1]-R[1],
|
||||||
vgl3[k] = 0.0;
|
X[2]-R[2] };
|
||||||
vgl4[k] = 0.0;
|
|
||||||
vgl5[k] = 0.0;
|
if (ldv == 50) {
|
||||||
}
|
const double tmp[50] = {
|
||||||
vgl1[0] = 1.0;
|
1.0, Y[0], Y[1], Y[2], Y[0] * Y[0],
|
||||||
vgl1[1] = Y[0];
|
Y[0] * Y[1], Y[0] * Y[2], Y[1] * Y[1], Y[1] * Y[2], Y[2] * Y[2],
|
||||||
vgl1[2] = Y[1];
|
0.0, 1.0, 0.0, 0.0, Y[0] + Y[0],
|
||||||
vgl1[3] = Y[2];
|
Y[1], Y[2], 0.0, 0.0, 0.0,
|
||||||
|
0.0, 0.0, 1.0, 0.0, 0.0,
|
||||||
vgl2[1] = 1.0;
|
Y[0], 0.0, Y[1] + Y[1], Y[2], 0.0,
|
||||||
vgl3[2] = 1.0;
|
0.0, 0.0, 0.0, 1.0, 0.0,
|
||||||
vgl4[3] = 1.0;
|
0.0, Y[0], 0.0, Y[1], Y[2] + Y[2],
|
||||||
m=4;
|
0.0, 0.0, 0.0, 0.0, 2.0,
|
||||||
|
0.0, 0.0, 2.0, 0., 2.0 };
|
||||||
double da = 2.0;
|
for (int i=0 ; i<50 ; ++i)
|
||||||
|
VGL[i] = tmp[i];
|
||||||
for (int32_t a=2 ; a>=0 ; --a) {
|
} else {
|
||||||
double db = 2.0-da;
|
double* const vgl1 = VGL;
|
||||||
|
double* const vgl2 = VGL + ldv;
|
||||||
for (int32_t b=2-a ; b>=0 ; --b) {
|
double* const vgl3 = VGL + (ldv << 1);
|
||||||
const int32_t c = 2 - a - b;
|
double* const vgl4 = VGL + 3*ldv;
|
||||||
const double dc = 2.0 - da - db;
|
double* const vgl5 = VGL + (ldv << 2);
|
||||||
|
|
||||||
double xy = pows[0][a+2] * pows[1][b+2];
|
vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1];
|
||||||
double yz = pows[1][b+2] * pows[2][c+2];
|
vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1];
|
||||||
double xz = pows[0][a+2] * pows[2][c+2];
|
vgl1[6] = Y[0]*Y[2]; vgl1[7] = Y[1]*Y[1]; vgl1[8] = Y[1]*Y[2];
|
||||||
|
vgl1[9] = Y[2]*Y[2];
|
||||||
l[m][0] = a;
|
|
||||||
l[m][1] = b;
|
vgl2[0] = 0.0 ; vgl2[1] = 1.0 ; vgl2[2] = 0.0 ;
|
||||||
l[m][2] = c;
|
vgl2[3] = 0.0 ; vgl2[4] = Y[0]+Y[0]; vgl2[5] = Y[1];
|
||||||
|
vgl2[6] = Y[2]; vgl2[7] = 0.0 ; vgl2[8] = 0.0 ;
|
||||||
vgl1[m] = xy * pows[2][c+2];
|
vgl2[9] = 0.0 ;
|
||||||
|
|
||||||
xy *= dc;
|
vgl3[0] = 0.0; vgl3[1] = 0.0 ; vgl3[2] = 1.0 ;
|
||||||
xz *= db;
|
vgl3[3] = 0.0; vgl3[4] = 0.0 ; vgl3[5] = Y[0];
|
||||||
yz *= da;
|
vgl3[6] = 0.0; vgl3[7] = Y[1]+Y[1]; vgl3[8] = Y[2];
|
||||||
|
vgl3[9] = 0.0;
|
||||||
vgl2[m] = pows[0][a+1] * yz;
|
|
||||||
vgl3[m] = pows[1][b+1] * xz;
|
vgl4[0] = 0.0 ; vgl4[1] = 0.0; vgl4[2] = 0.0 ;
|
||||||
vgl4[m] = pows[2][c+1] * xy;
|
vgl4[3] = 1.0 ; vgl4[4] = 0.0; vgl4[5] = 0.0 ;
|
||||||
|
vgl4[6] = Y[0] ; vgl4[7] = 0.0; vgl4[8] = Y[1];
|
||||||
vgl5[m] = (da-1.) * pows[0][a] * yz +
|
vgl4[9] = Y[2]+Y[2];
|
||||||
(db-1.) * pows[1][b] * xz +
|
|
||||||
(dc-1.) * pows[2][c] * xy;
|
vgl5[0] = 0.0; vgl5[1] = 0.0; vgl5[2] = 0.0;
|
||||||
|
vgl5[3] = 0.0; vgl5[4] = 2.0; vgl5[5] = 0.0;
|
||||||
db -= 1.0;
|
vgl5[6] = 0.0; vgl5[7] = 2.0; vgl5[8] = 0.0;
|
||||||
++m;
|
vgl5[9] = 2.0;
|
||||||
}
|
|
||||||
da -= 1.0;
|
|
||||||
}
|
}
|
||||||
|
m=10;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
@ -4674,9 +4689,17 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
|
|||||||
{
|
{
|
||||||
const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6;
|
const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6;
|
||||||
if (ldv < size_max) return QMCKL_INVALID_ARG_9;
|
if (ldv < size_max) return QMCKL_INVALID_ARG_9;
|
||||||
|
|
||||||
|
double* const vgl1 = VGL;
|
||||||
|
double* const vgl2 = VGL + ldv;
|
||||||
|
double* const vgl3 = VGL + (ldv << 1);
|
||||||
|
double* const vgl4 = VGL + 3*ldv;
|
||||||
|
double* const vgl5 = VGL + (ldv << 2);
|
||||||
|
|
||||||
const double Y[3] = { X[0]-R[0],
|
const double Y[3] = { X[0]-R[0],
|
||||||
X[1]-R[1],
|
X[1]-R[1],
|
||||||
X[2]-R[2] };
|
X[2]-R[2] };
|
||||||
|
|
||||||
double pows[3][size_max];
|
double pows[3][size_max];
|
||||||
|
|
||||||
for (int32_t i=0 ; i<=2 ; ++i) {
|
for (int32_t i=0 ; i<=2 ; ++i) {
|
||||||
@ -4949,21 +4972,22 @@ assert(0 == test_qmckl_ao_polynomial_vgl(context));
|
|||||||
|
|
||||||
double X[3] = { 1.1, 2.2, 3.3 };
|
double X[3] = { 1.1, 2.2, 3.3 };
|
||||||
double R[3] = { 0.2, 1.1, 3.0 };
|
double R[3] = { 0.2, 1.1, 3.0 };
|
||||||
|
int32_t ldv[4] = {1, 4, 10, 20};
|
||||||
|
for (int32_t ldl=3 ; ldl<5 ; ++ldl) {
|
||||||
int64_t n;
|
int64_t n;
|
||||||
int64_t ldl = 4;
|
|
||||||
int64_t ldv = 24;
|
|
||||||
int32_t L0[24][ldl];
|
int32_t L0[24][ldl];
|
||||||
int32_t L1[24][ldl];
|
int32_t L1[24][ldl];
|
||||||
double VGL0[5][ldv];
|
printf("ldl=%d\n", ldl);
|
||||||
double VGL1[5][ldv];
|
for (int32_t lmax=0 ; lmax<=3 ; lmax++) {
|
||||||
|
double VGL0[5][ldv[lmax]];
|
||||||
|
double VGL1[5][ldv[lmax]];
|
||||||
memset(&L0[0][0], 0, sizeof(L0));
|
memset(&L0[0][0], 0, sizeof(L0));
|
||||||
memset(&L1[0][0], 0, sizeof(L1));
|
memset(&L1[0][0], 0, sizeof(L1));
|
||||||
memset(&VGL0[0][0], 0, sizeof(VGL0));
|
memset(&VGL0[0][0], 0, sizeof(VGL0));
|
||||||
memset(&VGL1[0][0], 0, sizeof(VGL1));
|
memset(&VGL1[0][0], 0, sizeof(VGL1));
|
||||||
for (int32_t lmax=0 ; lmax<=3 ; lmax++) {
|
rc = qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, &n, &(L0[0][0]), ldl, &(VGL0[0][0]), ldv[lmax]);
|
||||||
rc = qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, &n, &(L0[0][0]), ldl, &(VGL0[0][0]), ldv);
|
|
||||||
assert (rc == QMCKL_SUCCESS);
|
assert (rc == QMCKL_SUCCESS);
|
||||||
rc = qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, &n, &(L1[0][0]), ldl, &(VGL1[0][0]), ldv);
|
rc = qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, &n, &(L1[0][0]), ldl, &(VGL1[0][0]), ldv[lmax]);
|
||||||
assert (rc == QMCKL_SUCCESS);
|
assert (rc == QMCKL_SUCCESS);
|
||||||
printf("lmax=%d\n", lmax);
|
printf("lmax=%d\n", lmax);
|
||||||
for (int32_t l=0 ; l<n ; ++l) {
|
for (int32_t l=0 ; l<n ; ++l) {
|
||||||
@ -4980,6 +5004,7 @@ for (int32_t k=0 ; k<5 ; ++k) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
* Combining radial and polynomial parts
|
* Combining radial and polynomial parts
|
||||||
@ -5251,7 +5276,7 @@ qmckl_compute_ao_vgl_hpc (
|
|||||||
|
|
||||||
int64_t n_poly;
|
int64_t n_poly;
|
||||||
const qmckl_exit_code rc =
|
const qmckl_exit_code rc =
|
||||||
qmckl_ao_polynomial_transp_vgl(context, e_coord, n_coord,
|
qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord,
|
||||||
nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3,
|
nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3,
|
||||||
&(poly_vgl[0][0]), size_max);
|
&(poly_vgl[0][0]), size_max);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user