mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 12:23:56 +01:00
Optimize AOs
This commit is contained in:
parent
ff526a18cb
commit
b6a31b8c58
157
org/qmckl_ao.org
157
org/qmckl_ao.org
@ -3040,11 +3040,11 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
|
||||
integer(c_int64_t), intent(in), value :: context
|
||||
|
||||
integer*8 :: n, ldv, j, i
|
||||
double precision :: X(3), R(3), Y(3), r2
|
||||
double precision :: X(3), R(3), Y(3), r2, z
|
||||
double precision, allocatable :: VGL(:,:), A(:)
|
||||
double precision :: epsilon
|
||||
|
||||
epsilon = qmckl_get_numprec_epsilon(context)
|
||||
epsilon = 3.d0 * qmckl_get_numprec_epsilon(context)
|
||||
|
||||
X = (/ 1.1 , 2.2 , 3.3 /)
|
||||
R = (/ 0.1 , 1.2 , -2.3 /)
|
||||
@ -3068,29 +3068,43 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
|
||||
|
||||
do i=1,n
|
||||
test_qmckl_ao_gaussian_vgl = -11
|
||||
if (dabs(1.d0 - VGL(i,1) / (&
|
||||
dexp(-A(i) * r2) &
|
||||
)) > epsilon ) return
|
||||
z = dabs(1.d0 - VGL(i,1) / (dexp(-A(i) * r2)) )
|
||||
if ( z > epsilon ) then
|
||||
print *, z, epsilon
|
||||
return
|
||||
end if
|
||||
|
||||
test_qmckl_ao_gaussian_vgl = -12
|
||||
if (dabs(1.d0 - VGL(i,2) / (&
|
||||
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) &
|
||||
)) > epsilon ) return
|
||||
z = dabs(1.d0 - VGL(i,2) / (&
|
||||
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) ))
|
||||
if ( z > epsilon ) then
|
||||
print *, z, epsilon
|
||||
return
|
||||
end if
|
||||
|
||||
test_qmckl_ao_gaussian_vgl = -13
|
||||
if (dabs(1.d0 - VGL(i,3) / (&
|
||||
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) &
|
||||
)) > epsilon ) return
|
||||
z = dabs(1.d0 - VGL(i,3) / (&
|
||||
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) ))
|
||||
if ( z > epsilon ) then
|
||||
print *, z, epsilon
|
||||
return
|
||||
end if
|
||||
|
||||
test_qmckl_ao_gaussian_vgl = -14
|
||||
if (dabs(1.d0 - VGL(i,4) / (&
|
||||
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) &
|
||||
)) > epsilon ) return
|
||||
z = dabs(1.d0 - VGL(i,4) / (&
|
||||
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) ))
|
||||
if ( z > epsilon ) then
|
||||
print *, z, epsilon
|
||||
return
|
||||
end if
|
||||
|
||||
test_qmckl_ao_gaussian_vgl = -15
|
||||
if (dabs(1.d0 - VGL(i,5) / (&
|
||||
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) &
|
||||
)) > epsilon ) return
|
||||
z = dabs(1.d0 - VGL(i,5) / (&
|
||||
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) ))
|
||||
if ( z > epsilon ) then
|
||||
print *, z, epsilon
|
||||
return
|
||||
end if
|
||||
end do
|
||||
|
||||
test_qmckl_ao_gaussian_vgl = 0
|
||||
@ -5423,16 +5437,16 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
const double s4 = s6_*z;
|
||||
const double s5 = s5_;
|
||||
|
||||
const int64_t k = ao_index[ishell];
|
||||
double* __restrict__ const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
|
||||
|
||||
const int32_t l = shell_ang_mom[ishell];
|
||||
const int32_t n = lstart[l+1]-lstart[l];
|
||||
const int64_t k = ao_index[ishell];
|
||||
|
||||
double* __restrict__ const ao_vgl_1 = &(ao_vgl[ipoint*5*ao_num+k]);
|
||||
double* __restrict__ const ao_vgl_2 = &(ao_vgl_1[ ao_num]);
|
||||
double* __restrict__ const ao_vgl_3 = &(ao_vgl_1[2*ao_num]);
|
||||
double* __restrict__ const ao_vgl_4 = &(ao_vgl_1[3*ao_num]);
|
||||
double* __restrict__ const ao_vgl_5 = &(ao_vgl_1[4*ao_num]);
|
||||
double* __restrict__ const ao_vgl_2 = ao_vgl_1 + ao_num;
|
||||
double* __restrict__ const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
|
||||
double* __restrict__ const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
|
||||
double* __restrict__ const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
|
||||
|
||||
double* __restrict__ poly_vgl_1;
|
||||
double* __restrict__ poly_vgl_2;
|
||||
@ -5440,71 +5454,84 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
double* __restrict__ poly_vgl_4;
|
||||
double* __restrict__ poly_vgl_5;
|
||||
if (nidx > 0) {
|
||||
const double* f = &(ao_factor[k]);
|
||||
const double* __restrict__ f = ao_factor + k;
|
||||
const int64_t idx = lstart[l];
|
||||
|
||||
switch (nucleus_max_ang_mom[inucl]) {
|
||||
case 0:
|
||||
ao_vgl_1[0] = s1 * f[0];
|
||||
ao_vgl_2[0] = s2 * f[0];
|
||||
ao_vgl_3[0] = s3 * f[0];
|
||||
ao_vgl_4[0] = s4 * f[0];
|
||||
ao_vgl_5[0] = s5;
|
||||
break;
|
||||
case 1:
|
||||
poly_vgl_1 = &(poly_vgl_l1[0][idx]);
|
||||
poly_vgl_2 = &(poly_vgl_l1[1][idx]);
|
||||
poly_vgl_3 = &(poly_vgl_l1[2][idx]);
|
||||
poly_vgl_4 = &(poly_vgl_l1[3][idx]);
|
||||
for (int64_t il=0 ; il<n ; ++il) {
|
||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
|
||||
2.0*(poly_vgl_2[il] * s2 +
|
||||
poly_vgl_3[il] * s3 +
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
poly_vgl_1 = &(poly_vgl_l2[0][idx]);
|
||||
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
|
||||
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
|
||||
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
|
||||
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
|
||||
|
||||
for (int64_t il=0 ; il<n ; ++il) {
|
||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
|
||||
2.0*(poly_vgl_2[il] * s2 +
|
||||
poly_vgl_3[il] * s3 +
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
poly_vgl_1 = &(poly_vgl[0][idx]);
|
||||
poly_vgl_2 = &(poly_vgl[1][idx]);
|
||||
poly_vgl_3 = &(poly_vgl[2][idx]);
|
||||
poly_vgl_4 = &(poly_vgl[3][idx]);
|
||||
poly_vgl_5 = &(poly_vgl[4][idx]);
|
||||
|
||||
for (int64_t il=0 ; il<n ; ++il) {
|
||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
|
||||
2.0*(poly_vgl_2[il] * s2 +
|
||||
poly_vgl_3[il] * s3 +
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
switch (n) {
|
||||
case(1):
|
||||
ao_vgl_1[0] = s1 * f[0];
|
||||
ao_vgl_2[0] = s2 * f[0];
|
||||
ao_vgl_3[0] = s3 * f[0];
|
||||
ao_vgl_4[0] = s4 * f[0];
|
||||
ao_vgl_5[0] = s5;
|
||||
break;
|
||||
case (3):
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int il=0 ; il<3 ; ++il) {
|
||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
|
||||
2.0*(poly_vgl_2[il] * s2 +
|
||||
poly_vgl_3[il] * s3 +
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
case(5):
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int il=0 ; il<5 ; ++il) {
|
||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
|
||||
2.0*(poly_vgl_2[il] * s2 +
|
||||
poly_vgl_3[il] * s3 +
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd simdlen(8)
|
||||
#endif
|
||||
for (int il=0 ; il<n ; ++il) {
|
||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
|
||||
2.0*(poly_vgl_2[il] * s2 +
|
||||
poly_vgl_3[il] * s3 +
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
for (int64_t il=0 ; il<n ; ++il) {
|
||||
ao_vgl_1[il] = 0.0;
|
||||
|
Loading…
Reference in New Issue
Block a user