1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 12:23:56 +01:00

Optimize AOs

This commit is contained in:
Anthony Scemama 2022-02-25 16:30:16 +01:00
parent ff526a18cb
commit b6a31b8c58

View File

@ -3040,11 +3040,11 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
integer(c_int64_t), intent(in), value :: context
integer*8 :: n, ldv, j, i
double precision :: X(3), R(3), Y(3), r2
double precision :: X(3), R(3), Y(3), r2, z
double precision, allocatable :: VGL(:,:), A(:)
double precision :: epsilon
epsilon = qmckl_get_numprec_epsilon(context)
epsilon = 3.d0 * qmckl_get_numprec_epsilon(context)
X = (/ 1.1 , 2.2 , 3.3 /)
R = (/ 0.1 , 1.2 , -2.3 /)
@ -3068,29 +3068,43 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
do i=1,n
test_qmckl_ao_gaussian_vgl = -11
if (dabs(1.d0 - VGL(i,1) / (&
dexp(-A(i) * r2) &
)) > epsilon ) return
z = dabs(1.d0 - VGL(i,1) / (dexp(-A(i) * r2)) )
if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -12
if (dabs(1.d0 - VGL(i,2) / (&
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) &
)) > epsilon ) return
z = dabs(1.d0 - VGL(i,2) / (&
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) ))
if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -13
if (dabs(1.d0 - VGL(i,3) / (&
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) &
)) > epsilon ) return
z = dabs(1.d0 - VGL(i,3) / (&
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) ))
if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -14
if (dabs(1.d0 - VGL(i,4) / (&
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) &
)) > epsilon ) return
z = dabs(1.d0 - VGL(i,4) / (&
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) ))
if ( z > epsilon ) then
print *, z, epsilon
return
end if
test_qmckl_ao_gaussian_vgl = -15
if (dabs(1.d0 - VGL(i,5) / (&
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) &
)) > epsilon ) return
z = dabs(1.d0 - VGL(i,5) / (&
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) ))
if ( z > epsilon ) then
print *, z, epsilon
return
end if
end do
test_qmckl_ao_gaussian_vgl = 0
@ -5423,16 +5437,16 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const double s4 = s6_*z;
const double s5 = s5_;
const int64_t k = ao_index[ishell];
double* __restrict__ const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l];
const int64_t k = ao_index[ishell];
double* __restrict__ const ao_vgl_1 = &(ao_vgl[ipoint*5*ao_num+k]);
double* __restrict__ const ao_vgl_2 = &(ao_vgl_1[ ao_num]);
double* __restrict__ const ao_vgl_3 = &(ao_vgl_1[2*ao_num]);
double* __restrict__ const ao_vgl_4 = &(ao_vgl_1[3*ao_num]);
double* __restrict__ const ao_vgl_5 = &(ao_vgl_1[4*ao_num]);
double* __restrict__ const ao_vgl_2 = ao_vgl_1 + ao_num;
double* __restrict__ const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
double* __restrict__ const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
double* __restrict__ const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
double* __restrict__ poly_vgl_1;
double* __restrict__ poly_vgl_2;
@ -5440,71 +5454,84 @@ qmckl_compute_ao_vgl_hpc_gaussian (
double* __restrict__ poly_vgl_4;
double* __restrict__ poly_vgl_5;
if (nidx > 0) {
const double* f = &(ao_factor[k]);
const double* __restrict__ f = ao_factor + k;
const int64_t idx = lstart[l];
switch (nucleus_max_ang_mom[inucl]) {
case 0:
ao_vgl_1[0] = s1 * f[0];
ao_vgl_2[0] = s2 * f[0];
ao_vgl_3[0] = s3 * f[0];
ao_vgl_4[0] = s4 * f[0];
ao_vgl_5[0] = s5;
break;
case 1:
poly_vgl_1 = &(poly_vgl_l1[0][idx]);
poly_vgl_2 = &(poly_vgl_l1[1][idx]);
poly_vgl_3 = &(poly_vgl_l1[2][idx]);
poly_vgl_4 = &(poly_vgl_l1[3][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
case 2:
poly_vgl_1 = &(poly_vgl_l2[0][idx]);
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
default:
poly_vgl_1 = &(poly_vgl[0][idx]);
poly_vgl_2 = &(poly_vgl[1][idx]);
poly_vgl_3 = &(poly_vgl[2][idx]);
poly_vgl_4 = &(poly_vgl[3][idx]);
poly_vgl_5 = &(poly_vgl[4][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
}
switch (n) {
case(1):
ao_vgl_1[0] = s1 * f[0];
ao_vgl_2[0] = s2 * f[0];
ao_vgl_3[0] = s3 * f[0];
ao_vgl_4[0] = s4 * f[0];
ao_vgl_5[0] = s5;
break;
case (3):
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int il=0 ; il<3 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
case(5):
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int il=0 ; il<5 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
default:
#ifdef HAVE_OPENMP
#pragma omp simd simdlen(8)
#endif
for (int il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
}
} else {
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = 0.0;