mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 18:16:28 +01:00
Optimize AOs
This commit is contained in:
parent
ff526a18cb
commit
b6a31b8c58
123
org/qmckl_ao.org
123
org/qmckl_ao.org
@ -3040,11 +3040,11 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
|
|||||||
integer(c_int64_t), intent(in), value :: context
|
integer(c_int64_t), intent(in), value :: context
|
||||||
|
|
||||||
integer*8 :: n, ldv, j, i
|
integer*8 :: n, ldv, j, i
|
||||||
double precision :: X(3), R(3), Y(3), r2
|
double precision :: X(3), R(3), Y(3), r2, z
|
||||||
double precision, allocatable :: VGL(:,:), A(:)
|
double precision, allocatable :: VGL(:,:), A(:)
|
||||||
double precision :: epsilon
|
double precision :: epsilon
|
||||||
|
|
||||||
epsilon = qmckl_get_numprec_epsilon(context)
|
epsilon = 3.d0 * qmckl_get_numprec_epsilon(context)
|
||||||
|
|
||||||
X = (/ 1.1 , 2.2 , 3.3 /)
|
X = (/ 1.1 , 2.2 , 3.3 /)
|
||||||
R = (/ 0.1 , 1.2 , -2.3 /)
|
R = (/ 0.1 , 1.2 , -2.3 /)
|
||||||
@ -3068,29 +3068,43 @@ integer(c_int32_t) function test_qmckl_ao_gaussian_vgl(context) bind(C)
|
|||||||
|
|
||||||
do i=1,n
|
do i=1,n
|
||||||
test_qmckl_ao_gaussian_vgl = -11
|
test_qmckl_ao_gaussian_vgl = -11
|
||||||
if (dabs(1.d0 - VGL(i,1) / (&
|
z = dabs(1.d0 - VGL(i,1) / (dexp(-A(i) * r2)) )
|
||||||
dexp(-A(i) * r2) &
|
if ( z > epsilon ) then
|
||||||
)) > epsilon ) return
|
print *, z, epsilon
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
test_qmckl_ao_gaussian_vgl = -12
|
test_qmckl_ao_gaussian_vgl = -12
|
||||||
if (dabs(1.d0 - VGL(i,2) / (&
|
z = dabs(1.d0 - VGL(i,2) / (&
|
||||||
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) &
|
-2.d0 * A(i) * Y(1) * dexp(-A(i) * r2) ))
|
||||||
)) > epsilon ) return
|
if ( z > epsilon ) then
|
||||||
|
print *, z, epsilon
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
test_qmckl_ao_gaussian_vgl = -13
|
test_qmckl_ao_gaussian_vgl = -13
|
||||||
if (dabs(1.d0 - VGL(i,3) / (&
|
z = dabs(1.d0 - VGL(i,3) / (&
|
||||||
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) &
|
-2.d0 * A(i) * Y(2) * dexp(-A(i) * r2) ))
|
||||||
)) > epsilon ) return
|
if ( z > epsilon ) then
|
||||||
|
print *, z, epsilon
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
test_qmckl_ao_gaussian_vgl = -14
|
test_qmckl_ao_gaussian_vgl = -14
|
||||||
if (dabs(1.d0 - VGL(i,4) / (&
|
z = dabs(1.d0 - VGL(i,4) / (&
|
||||||
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) &
|
-2.d0 * A(i) * Y(3) * dexp(-A(i) * r2) ))
|
||||||
)) > epsilon ) return
|
if ( z > epsilon ) then
|
||||||
|
print *, z, epsilon
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
test_qmckl_ao_gaussian_vgl = -15
|
test_qmckl_ao_gaussian_vgl = -15
|
||||||
if (dabs(1.d0 - VGL(i,5) / (&
|
z = dabs(1.d0 - VGL(i,5) / (&
|
||||||
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) &
|
A(i) * (4.d0*r2*A(i) - 6.d0) * dexp(-A(i) * r2) ))
|
||||||
)) > epsilon ) return
|
if ( z > epsilon ) then
|
||||||
|
print *, z, epsilon
|
||||||
|
return
|
||||||
|
end if
|
||||||
end do
|
end do
|
||||||
|
|
||||||
test_qmckl_ao_gaussian_vgl = 0
|
test_qmckl_ao_gaussian_vgl = 0
|
||||||
@ -5423,16 +5437,16 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
const double s4 = s6_*z;
|
const double s4 = s6_*z;
|
||||||
const double s5 = s5_;
|
const double s5 = s5_;
|
||||||
|
|
||||||
|
const int64_t k = ao_index[ishell];
|
||||||
|
double* __restrict__ const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
|
||||||
|
|
||||||
const int32_t l = shell_ang_mom[ishell];
|
const int32_t l = shell_ang_mom[ishell];
|
||||||
const int32_t n = lstart[l+1]-lstart[l];
|
const int32_t n = lstart[l+1]-lstart[l];
|
||||||
const int64_t k = ao_index[ishell];
|
|
||||||
|
|
||||||
double* __restrict__ const ao_vgl_1 = &(ao_vgl[ipoint*5*ao_num+k]);
|
double* __restrict__ const ao_vgl_2 = ao_vgl_1 + ao_num;
|
||||||
double* __restrict__ const ao_vgl_2 = &(ao_vgl_1[ ao_num]);
|
double* __restrict__ const ao_vgl_3 = ao_vgl_1 + (ao_num<<1);
|
||||||
double* __restrict__ const ao_vgl_3 = &(ao_vgl_1[2*ao_num]);
|
double* __restrict__ const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num;
|
||||||
double* __restrict__ const ao_vgl_4 = &(ao_vgl_1[3*ao_num]);
|
double* __restrict__ const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
|
||||||
double* __restrict__ const ao_vgl_5 = &(ao_vgl_1[4*ao_num]);
|
|
||||||
|
|
||||||
double* __restrict__ poly_vgl_1;
|
double* __restrict__ poly_vgl_1;
|
||||||
double* __restrict__ poly_vgl_2;
|
double* __restrict__ poly_vgl_2;
|
||||||
@ -5440,23 +5454,43 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
double* __restrict__ poly_vgl_4;
|
double* __restrict__ poly_vgl_4;
|
||||||
double* __restrict__ poly_vgl_5;
|
double* __restrict__ poly_vgl_5;
|
||||||
if (nidx > 0) {
|
if (nidx > 0) {
|
||||||
const double* f = &(ao_factor[k]);
|
const double* __restrict__ f = ao_factor + k;
|
||||||
const int64_t idx = lstart[l];
|
const int64_t idx = lstart[l];
|
||||||
|
|
||||||
switch (nucleus_max_ang_mom[inucl]) {
|
switch (nucleus_max_ang_mom[inucl]) {
|
||||||
case 0:
|
case 0:
|
||||||
ao_vgl_1[0] = s1 * f[0];
|
|
||||||
ao_vgl_2[0] = s2 * f[0];
|
|
||||||
ao_vgl_3[0] = s3 * f[0];
|
|
||||||
ao_vgl_4[0] = s4 * f[0];
|
|
||||||
ao_vgl_5[0] = s5;
|
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
poly_vgl_1 = &(poly_vgl_l1[0][idx]);
|
poly_vgl_1 = &(poly_vgl_l1[0][idx]);
|
||||||
poly_vgl_2 = &(poly_vgl_l1[1][idx]);
|
poly_vgl_2 = &(poly_vgl_l1[1][idx]);
|
||||||
poly_vgl_3 = &(poly_vgl_l1[2][idx]);
|
poly_vgl_3 = &(poly_vgl_l1[2][idx]);
|
||||||
poly_vgl_4 = &(poly_vgl_l1[3][idx]);
|
poly_vgl_4 = &(poly_vgl_l1[3][idx]);
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
break;
|
||||||
|
case 2:
|
||||||
|
poly_vgl_1 = &(poly_vgl_l2[0][idx]);
|
||||||
|
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
|
||||||
|
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
|
||||||
|
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
poly_vgl_1 = &(poly_vgl[0][idx]);
|
||||||
|
poly_vgl_2 = &(poly_vgl[1][idx]);
|
||||||
|
poly_vgl_3 = &(poly_vgl[2][idx]);
|
||||||
|
poly_vgl_4 = &(poly_vgl[3][idx]);
|
||||||
|
}
|
||||||
|
switch (n) {
|
||||||
|
case(1):
|
||||||
|
ao_vgl_1[0] = s1 * f[0];
|
||||||
|
ao_vgl_2[0] = s2 * f[0];
|
||||||
|
ao_vgl_3[0] = s3 * f[0];
|
||||||
|
ao_vgl_4[0] = s4 * f[0];
|
||||||
|
ao_vgl_5[0] = s5;
|
||||||
|
break;
|
||||||
|
case (3):
|
||||||
|
#ifdef HAVE_OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
|
for (int il=0 ; il<3 ; ++il) {
|
||||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||||
@ -5467,44 +5501,37 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
poly_vgl_4[il] * s4 )) * f[il];
|
poly_vgl_4[il] * s4 )) * f[il];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case(5):
|
||||||
poly_vgl_1 = &(poly_vgl_l2[0][idx]);
|
#ifdef HAVE_OPENMP
|
||||||
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
|
#pragma omp simd
|
||||||
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
|
#endif
|
||||||
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
|
for (int il=0 ; il<5 ; ++il) {
|
||||||
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
|
|
||||||
|
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
|
||||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||||
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
|
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
|
||||||
2.0*(poly_vgl_2[il] * s2 +
|
2.0*(poly_vgl_2[il] * s2 +
|
||||||
poly_vgl_3[il] * s3 +
|
poly_vgl_3[il] * s3 +
|
||||||
poly_vgl_4[il] * s4 )) * f[il];
|
poly_vgl_4[il] * s4 )) * f[il];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
poly_vgl_1 = &(poly_vgl[0][idx]);
|
#ifdef HAVE_OPENMP
|
||||||
poly_vgl_2 = &(poly_vgl[1][idx]);
|
#pragma omp simd simdlen(8)
|
||||||
poly_vgl_3 = &(poly_vgl[2][idx]);
|
#endif
|
||||||
poly_vgl_4 = &(poly_vgl[3][idx]);
|
for (int il=0 ; il<n ; ++il) {
|
||||||
poly_vgl_5 = &(poly_vgl[4][idx]);
|
|
||||||
|
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
|
||||||
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||||
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
|
||||||
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
|
||||||
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
|
||||||
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
|
ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
|
||||||
2.0*(poly_vgl_2[il] * s2 +
|
2.0*(poly_vgl_2[il] * s2 +
|
||||||
poly_vgl_3[il] * s3 +
|
poly_vgl_3[il] * s3 +
|
||||||
poly_vgl_4[il] * s4 )) * f[il];
|
poly_vgl_4[il] * s4 )) * f[il];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
for (int64_t il=0 ; il<n ; ++il) {
|
||||||
ao_vgl_1[il] = 0.0;
|
ao_vgl_1[il] = 0.0;
|
||||||
|
Loading…
Reference in New Issue
Block a user