diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 5e5a99a..fbbb178 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4530,153 +4530,176 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, int32_t m; - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv << 1); - double* const vgl4 = VGL + 3*ldv; - double* const vgl5 = VGL + (ldv << 2); - switch (lmax) { case 0: { if (ldv < 1) return QMCKL_INVALID_ARG_9; - L[0] = 0; - L[ldl] = 0; - L[ldl << 1] = 0; - m=1; + L[0] = 0; L[1] = 0; L[2] = 0; - vgl1[0] = 1.0; - vgl2[0] = 0.0; - vgl3[0] = 0.0; - vgl4[0] = 0.0; - vgl5[0] = 0.0; + VGL[0 ] = 1.0; + VGL[ldv ] = 0.0; + VGL[ldv<<1 ] = 0.0; + VGL[ldv<<2-1] = 0.0; + VGL[ldv<<2 ] = 0.0; + m=1; break; } case 1: { if (ldv < 4) return QMCKL_INVALID_ARG_9; - const double Y[3] = { X[0]-R[0], - X[1]-R[1], - X[2]-R[2] }; - int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl}; - for (int32_t i=0 ; i<4 ; ++i) { - l[i][0] = 0; - l[i][1] = 0; - l[i][2] = 0; - } - l[1][0] = 1; - l[2][1] = 1; - l[3][2] = 1; + if (ldl == 3) { - for (int32_t k=0 ; k<4 ; ++k) { - vgl2[k] = 0.0; - vgl3[k] = 0.0; - vgl4[k] = 0.0; - vgl5[k] = 0.0; + const int32_t ltmp[12] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1}; + for (int i=0 ; i<12 ; ++i) + L[i] = ltmp[i]; + + } else { + + int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl}; + l[0][0] = 0; l[0][1] = 0; l[0][2] = 0; + l[1][0] = 1; l[1][1] = 0; l[1][2] = 0; + l[2][0] = 0; l[2][1] = 1; l[2][2] = 0; + l[3][0] = 0; l[3][1] = 0; l[3][2] = 1; + } + + + if (ldv == 4) { + + const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0}; + for (int i=0 ; i<20 ; ++i) + VGL[i] = tmp[i]; + + } else { + + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); + + for (int32_t k=0 ; k<4 ; ++k) { + vgl2[k] = 0.0; + vgl3[k] = 0.0; + vgl4[k] = 0.0; + vgl5[k] = 0.0; + } + vgl1[0] = 1.0; + vgl1[1] = X[0]-R[0]; + vgl1[2] = X[1]-R[1]; + vgl1[3] = X[2]-R[2]; + vgl2[1] = 1.0; + vgl3[2] = 1.0; + vgl4[3] = 1.0; } - vgl1[0] = 1.0; - vgl1[1] = Y[0]; - vgl1[2] = Y[1]; - vgl1[3] = Y[2]; - vgl2[1] = 1.0; - vgl3[2] = 1.0; - vgl4[3] = 1.0; m=4; break; } case 2: { if (ldv < 10) return QMCKL_INVALID_ARG_9; + if (ldl == 3) { + const int32_t ltmp[30] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, + 2, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, + 0, 1, 1, 0, 0, 2}; + for (int i=0 ; i<30 ; ++i) + L[i] = ltmp[i]; + + } else { + int32_t* l[10]; + for (int32_t i=0 ; i<10 ; ++i) { + l[i] = L + i*ldl; + } + + l[0][0] = 0; l[0][1] = 0; l[0][2] = 0; + l[1][0] = 1; l[1][1] = 0; l[1][2] = 0; + l[2][0] = 0; l[2][1] = 1; l[2][2] = 0; + l[3][0] = 0; l[3][1] = 0; l[3][2] = 1; + l[4][0] = 2; l[4][1] = 0; l[4][2] = 0; + l[5][0] = 1; l[5][1] = 1; l[5][2] = 0; + l[6][0] = 1; l[6][1] = 0; l[6][2] = 1; + l[7][0] = 0; l[7][1] = 2; l[7][2] = 0; + l[8][0] = 0; l[8][1] = 1; l[8][2] = 1; + l[9][0] = 0; l[9][1] = 0; l[9][2] = 2; + } + const double Y[3] = { X[0]-R[0], X[1]-R[1], X[2]-R[2] }; - double pows[3][5] = { {1.0, 1.0, 1.0, Y[0], Y[0]*Y[0]}, - {1.0, 1.0, 1.0, Y[1], Y[1]*Y[1]}, - {1.0, 1.0, 1.0, Y[2], Y[2]*Y[2]} }; - - int32_t* l[10]; - for (int32_t i=0 ; i<10 ; ++i) { - l[i] = L + i*ldl; - } - - for (int32_t i=0 ; i<4 ; ++i) { - l[i][0] = 0; - l[i][1] = 0; - l[i][2] = 0; - } - l[1][0] = 1; - l[2][1] = 1; - l[3][2] = 1; - - for (int32_t k=0 ; k<4 ; ++k) { - vgl2[k] = 0.0; - vgl3[k] = 0.0; - vgl4[k] = 0.0; - vgl5[k] = 0.0; - } - vgl1[0] = 1.0; - vgl1[1] = Y[0]; - vgl1[2] = Y[1]; - vgl1[3] = Y[2]; + if (ldv == 50) { + const double tmp[50] = { + 1.0, Y[0], Y[1], Y[2], Y[0] * Y[0], + Y[0] * Y[1], Y[0] * Y[2], Y[1] * Y[1], Y[1] * Y[2], Y[2] * Y[2], + 0.0, 1.0, 0.0, 0.0, Y[0] + Y[0], + Y[1], Y[2], 0.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, 0.0, + Y[0], 0.0, Y[1] + Y[1], Y[2], 0.0, + 0.0, 0.0, 0.0, 1.0, 0.0, + 0.0, Y[0], 0.0, Y[1], Y[2] + Y[2], + 0.0, 0.0, 0.0, 0.0, 2.0, + 0.0, 0.0, 2.0, 0., 2.0 }; + for (int i=0 ; i<50 ; ++i) + VGL[i] = tmp[i]; + } else { + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); - vgl2[1] = 1.0; - vgl3[2] = 1.0; - vgl4[3] = 1.0; - m=4; - - double da = 2.0; - - for (int32_t a=2 ; a>=0 ; --a) { - double db = 2.0-da; - - for (int32_t b=2-a ; b>=0 ; --b) { - const int32_t c = 2 - a - b; - const double dc = 2.0 - da - db; - - double xy = pows[0][a+2] * pows[1][b+2]; - double yz = pows[1][b+2] * pows[2][c+2]; - double xz = pows[0][a+2] * pows[2][c+2]; - - l[m][0] = a; - l[m][1] = b; - l[m][2] = c; - - vgl1[m] = xy * pows[2][c+2]; - - xy *= dc; - xz *= db; - yz *= da; - - vgl2[m] = pows[0][a+1] * yz; - vgl3[m] = pows[1][b+1] * xz; - vgl4[m] = pows[2][c+1] * xy; - - vgl5[m] = (da-1.) * pows[0][a] * yz + - (db-1.) * pows[1][b] * xz + - (dc-1.) * pows[2][c] * xy; - - db -= 1.0; - ++m; - } - da -= 1.0; - } + vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1]; + vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1]; + vgl1[6] = Y[0]*Y[2]; vgl1[7] = Y[1]*Y[1]; vgl1[8] = Y[1]*Y[2]; + vgl1[9] = Y[2]*Y[2]; + + vgl2[0] = 0.0 ; vgl2[1] = 1.0 ; vgl2[2] = 0.0 ; + vgl2[3] = 0.0 ; vgl2[4] = Y[0]+Y[0]; vgl2[5] = Y[1]; + vgl2[6] = Y[2]; vgl2[7] = 0.0 ; vgl2[8] = 0.0 ; + vgl2[9] = 0.0 ; + + vgl3[0] = 0.0; vgl3[1] = 0.0 ; vgl3[2] = 1.0 ; + vgl3[3] = 0.0; vgl3[4] = 0.0 ; vgl3[5] = Y[0]; + vgl3[6] = 0.0; vgl3[7] = Y[1]+Y[1]; vgl3[8] = Y[2]; + vgl3[9] = 0.0; + + vgl4[0] = 0.0 ; vgl4[1] = 0.0; vgl4[2] = 0.0 ; + vgl4[3] = 1.0 ; vgl4[4] = 0.0; vgl4[5] = 0.0 ; + vgl4[6] = Y[0] ; vgl4[7] = 0.0; vgl4[8] = Y[1]; + vgl4[9] = Y[2]+Y[2]; + + vgl5[0] = 0.0; vgl5[1] = 0.0; vgl5[2] = 0.0; + vgl5[3] = 0.0; vgl5[4] = 2.0; vgl5[5] = 0.0; + vgl5[6] = 0.0; vgl5[7] = 2.0; vgl5[8] = 0.0; + vgl5[9] = 2.0; + } + m=10; break; } /* - case 3: - { - if (ldv < 20) return QMCKL_INVALID_ARG_9; - } - ,*/ + case 3: + { + if (ldv < 20) return QMCKL_INVALID_ARG_9; + } + ,*/ default: { const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6; if (ldv < size_max) return QMCKL_INVALID_ARG_9; + + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); + const double Y[3] = { X[0]-R[0], X[1]-R[1], X[2]-R[2] }; + double pows[3][size_max]; for (int32_t i=0 ; i<=2 ; ++i) { @@ -4949,36 +4972,38 @@ assert(0 == test_qmckl_ao_polynomial_vgl(context)); double X[3] = { 1.1, 2.2, 3.3 }; double R[3] = { 0.2, 1.1, 3.0 }; -int64_t n; -int64_t ldl = 4; -int64_t ldv = 24; -int32_t L0[24][ldl]; -int32_t L1[24][ldl]; -double VGL0[5][ldv]; -double VGL1[5][ldv]; -memset(&L0[0][0], 0, sizeof(L0)); -memset(&L1[0][0], 0, sizeof(L1)); -memset(&VGL0[0][0], 0, sizeof(VGL0)); -memset(&VGL1[0][0], 0, sizeof(VGL1)); -for (int32_t lmax=0 ; lmax<=3 ; lmax++) { -rc = qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, &n, &(L0[0][0]), ldl, &(VGL0[0][0]), ldv); -assert (rc == QMCKL_SUCCESS); -rc = qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, &n, &(L1[0][0]), ldl, &(VGL1[0][0]), ldv); -assert (rc == QMCKL_SUCCESS); -printf("lmax=%d\n", lmax); -for (int32_t l=0 ; l