1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-08 20:33:40 +01:00

Optimize AOs

This commit is contained in:
Anthony Scemama 2022-02-25 16:30:16 +01:00
parent ff526a18cb
commit b6a31b8c58

View File

@ -3040,11 +3040,11 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
integer(c_int64_t), intent(in), value :: context integer(c_int64_t), intent(in), value :: context
integer*8 :: n, ldv, j, i integer*8 :: n, ldv, j, i
double precision :: X(3), R(3), Y(3), r2 double precision :: X(3), R(3), Y(3), r2, z
double precision, allocatable :: VGL(:,:), A(:) double precision, allocatable :: VGL(:,:), A(:)
double precision :: epsilon double precision :: epsilon
epsilon = qmckl_get_numprec_epsilon(context) epsilon = 3.d0 * qmckl_get_numprec_epsilon(context)
X = (/ 1.1 , 2.2 , 3.3 /) X = (/ 1.1 , 2.2 , 3.3 /)
R = (/ 0.1 , 1.2 , -2.3 /) R = (/ 0.1 , 1.2 , -2.3 /)
@ -3068,29 +3068,43 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
do i=1,n do i=1,n
test_qmckl_ao_gaussian_vgl = -11 test_qmckl_ao_gaussian_vgl = -11
if (dabs(1.d0 - VGL(i,1) / (& z = dabs(1.d0 - VGL(i,1) / (dexp(-A(i) * r2)) )
dexp(-A(i) * r2) & if ( z > epsilon ) then
)) > epsilon ) return print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -12 test_qmckl_ao_gaussian_vgl = -12
if (dabs(1.d0 - VGL(i,2) / (& z = dabs(1.d0 - VGL(i,2) / (&
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) & -2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) ))
)) > epsilon ) return if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -13 test_qmckl_ao_gaussian_vgl = -13
if (dabs(1.d0 - VGL(i,3) / (& z = dabs(1.d0 - VGL(i,3) / (&
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) & -2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) ))
)) > epsilon ) return if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -14 test_qmckl_ao_gaussian_vgl = -14
if (dabs(1.d0 - VGL(i,4) / (& z = dabs(1.d0 - VGL(i,4) / (&
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) & -2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) ))
)) > epsilon ) return if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -15 test_qmckl_ao_gaussian_vgl = -15
if (dabs(1.d0 - VGL(i,5) / (& z = dabs(1.d0 - VGL(i,5) / (&
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) & A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) ))
)) > epsilon ) return if ( z > epsilon ) then
print *, z, epsilon
return
end if
end do end do
test_qmckl_ao_gaussian_vgl = 0 test_qmckl_ao_gaussian_vgl = 0
@ -5423,16 +5437,16 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const double s4 = s6_*z; const double s4 = s6_*z;
const double s5 = s5_; const double s5 = s5_;
const int64_t k = ao_index[ishell];
double* __restrict__ const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
const int32_t l = shell_ang_mom[ishell]; const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l]; const int32_t n = lstart[l+1]-lstart[l];
const int64_t k = ao_index[ishell];
double* __restrict__ const ao_vgl_1 = &(ao_vgl[ipoint*5*ao_num+k]); double* __restrict__ const ao_vgl_2 = ao_vgl_1 + ao_num;
double* __restrict__ const ao_vgl_2 = &(ao_vgl_1[ ao_num]); double* __restrict__ const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
double* __restrict__ const ao_vgl_3 = &(ao_vgl_1[2*ao_num]); double* __restrict__ const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
double* __restrict__ const ao_vgl_4 = &(ao_vgl_1[3*ao_num]); double* __restrict__ const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
double* __restrict__ const ao_vgl_5 = &(ao_vgl_1[4*ao_num]);
double* __restrict__ poly_vgl_1; double* __restrict__ poly_vgl_1;
double* __restrict__ poly_vgl_2; double* __restrict__ poly_vgl_2;
@ -5440,23 +5454,43 @@ qmckl_compute_ao_vgl_hpc_gaussian (
double* __restrict__ poly_vgl_4; double* __restrict__ poly_vgl_4;
double* __restrict__ poly_vgl_5; double* __restrict__ poly_vgl_5;
if (nidx > 0) { if (nidx > 0) {
const double* f = &(ao_factor[k]); const double* __restrict__ f = ao_factor + k;
const int64_t idx = lstart[l]; const int64_t idx = lstart[l];
switch (nucleus_max_ang_mom[inucl]) { switch (nucleus_max_ang_mom[inucl]) {
case 0: case 0:
ao_vgl_1[0] = s1 * f[0];
ao_vgl_2[0] = s2 * f[0];
ao_vgl_3[0] = s3 * f[0];
ao_vgl_4[0] = s4 * f[0];
ao_vgl_5[0] = s5;
break; break;
case 1: case 1:
poly_vgl_1 = &(poly_vgl_l1[0][idx]); poly_vgl_1 = &(poly_vgl_l1[0][idx]);
poly_vgl_2 = &(poly_vgl_l1[1][idx]); poly_vgl_2 = &(poly_vgl_l1[1][idx]);
poly_vgl_3 = &(poly_vgl_l1[2][idx]); poly_vgl_3 = &(poly_vgl_l1[2][idx]);
poly_vgl_4 = &(poly_vgl_l1[3][idx]); poly_vgl_4 = &(poly_vgl_l1[3][idx]);
for (int64_t il=0 ; il<n ; ++il) { break;
case 2:
poly_vgl_1 = &(poly_vgl_l2[0][idx]);
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
break;
default:
poly_vgl_1 = &(poly_vgl[0][idx]);
poly_vgl_2 = &(poly_vgl[1][idx]);
poly_vgl_3 = &(poly_vgl[2][idx]);
poly_vgl_4 = &(poly_vgl[3][idx]);
}
switch (n) {
case(1):
ao_vgl_1[0] = s1 * f[0];
ao_vgl_2[0] = s2 * f[0];
ao_vgl_3[0] = s3 * f[0];
ao_vgl_4[0] = s4 * f[0];
ao_vgl_5[0] = s5;
break;
case (3):
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int il=0 ; il<3 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
@ -5467,44 +5501,37 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_4[il] * s4 )) * f[il]; poly_vgl_4[il] * s4 )) * f[il];
} }
break; break;
case 2: case(5):
poly_vgl_1 = &(poly_vgl_l2[0][idx]); #ifdef HAVE_OPENMP
poly_vgl_2 = &(poly_vgl_l2[1][idx]); #pragma omp simd
poly_vgl_3 = &(poly_vgl_l2[2][idx]); #endif
poly_vgl_4 = &(poly_vgl_l2[3][idx]); for (int il=0 ; il<5 ; ++il) {
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 + ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 + 2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 + poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il]; poly_vgl_4[il] * s4 )) * f[il];
} }
break; break;
default: default:
poly_vgl_1 = &(poly_vgl[0][idx]); #ifdef HAVE_OPENMP
poly_vgl_2 = &(poly_vgl[1][idx]); #pragma omp simd simdlen(8)
poly_vgl_3 = &(poly_vgl[2][idx]); #endif
poly_vgl_4 = &(poly_vgl[3][idx]); for (int il=0 ; il<n ; ++il) {
poly_vgl_5 = &(poly_vgl[4][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 + ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 + 2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 + poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il]; poly_vgl_4[il] * s4 )) * f[il];
} }
break; break;
} }
} else { } else {
for (int64_t il=0 ; il<n ; ++il) { for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = 0.0; ao_vgl_1[il] = 0.0;