mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-06 19:33:14 +01:00
Replace += by = ... + for better FMA
This commit is contained in:
parent
ffbeb97df4
commit
dd3db966b0
@ -5910,7 +5910,7 @@ IVDEP
|
|||||||
for (int l=1 ; l<coef_mat_sparse_idx[inucl][i][0]; ++l) {
|
for (int l=1 ; l<coef_mat_sparse_idx[inucl][i][0]; ++l) {
|
||||||
const int k = idx[l];
|
const int k = idx[l];
|
||||||
if (k >= nidx) break;
|
if (k >= nidx) break;
|
||||||
ce_mat[i] += v[l] * exp_mat[k];
|
ce_mat[i] = ce_mat[i] + v[l] * exp_mat[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6730,7 +6730,7 @@ IVDEP
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int j=0 ; j<8 ; ++j) {
|
for (int j=0 ; j<8 ; ++j) {
|
||||||
ce_mat[i][j] += v[l] * exp_mat[k][j];
|
ce_mat[i][j] = ce_mat[i][j] + v[l] * exp_mat[k][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1994,7 +1994,7 @@ qmckl_compute_jastrow_champ_asymp_jasb_hpc (const qmckl_context context,
|
|||||||
double x = kappa_inv;
|
double x = kappa_inv;
|
||||||
for (int k = 2; k <= bord_num; ++k) {
|
for (int k = 2; k <= bord_num; ++k) {
|
||||||
x *= kappa_inv;
|
x *= kappa_inv;
|
||||||
f += b_vector[k]*x;
|
f = f + b_vector[k]*x;
|
||||||
}
|
}
|
||||||
|
|
||||||
asymp_jasb[0] = spin_independent == 1 ? asym_one + f : 0.5 * asym_one + f;
|
asymp_jasb[0] = spin_independent == 1 ? asym_one + f : 0.5 * asym_one + f;
|
||||||
@ -2491,7 +2491,7 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context,
|
|||||||
for (int j = 0; j < elec_num; ++j ) {
|
for (int j = 0; j < elec_num; ++j ) {
|
||||||
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
||||||
for (int i = 0; i < j ; ++i) {
|
for (int i = 0; i < j ; ++i) {
|
||||||
factor_ee[nw] += b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
factor_ee[nw] = factor_ee[nw] + b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2500,23 +2500,23 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context,
|
|||||||
for (int j = 0; j < up_num; ++j ) {
|
for (int j = 0; j < up_num; ++j ) {
|
||||||
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
||||||
for (int i = 0; i < j ; ++i) {
|
for (int i = 0; i < j ; ++i) {
|
||||||
factor_ee[nw] += 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
factor_ee[nw] = factor_ee[nw] + 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = up_num ; j < elec_num; ++j ) {
|
for (int j = up_num ; j < elec_num; ++j ) {
|
||||||
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
||||||
for (int i = 0; i < up_num; ++i) {
|
for (int i = 0; i < up_num; ++i) {
|
||||||
factor_ee[nw] += b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
factor_ee[nw] = factor_ee[nw] + b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
||||||
}
|
}
|
||||||
for (int i = up_num ; i < j ; ++i) {
|
for (int i = up_num ; i < j ; ++i) {
|
||||||
factor_ee[nw] += 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
factor_ee[nw] = factor_ee[nw] + 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
factor_ee[nw] -= fshift;
|
factor_ee[nw] = factor_ee[nw] - fshift;
|
||||||
|
|
||||||
for (int j=0; j < elec_num; ++j ) {
|
for (int j=0; j < elec_num; ++j ) {
|
||||||
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]);
|
||||||
@ -2525,7 +2525,7 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context,
|
|||||||
double xk = x;
|
double xk = x;
|
||||||
for (int k = 2; k <= bord_num; ++k) {
|
for (int k = 2; k <= bord_num; ++k) {
|
||||||
xk *= x;
|
xk *= x;
|
||||||
factor_ee[nw] += b_vector[k] * xk;
|
factor_ee[nw] = factor_ee[nw] + b_vector[k] * xk;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -2991,11 +2991,11 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
|
|||||||
f *= 0.5;
|
f *= 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
factor_ee_gl_0[i] += f*dx[0];
|
factor_ee_gl_0[i] = factor_ee_gl_0[i] + f*dx[0];
|
||||||
factor_ee_gl_1[i] += f*dx[1];
|
factor_ee_gl_1[i] = factor_ee_gl_1[i] + f*dx[1];
|
||||||
factor_ee_gl_2[i] += f*dx[2];
|
factor_ee_gl_2[i] = factor_ee_gl_2[i] + f*dx[2];
|
||||||
factor_ee_gl_3[i] += f*dx[3];
|
factor_ee_gl_3[i] = factor_ee_gl_3[i] + f*dx[3];
|
||||||
factor_ee_gl_3[i] -= f*grad_c2*invdenom*2.0 * b_vector[1];
|
factor_ee_gl_3[i] = factor_ee_gl_3[i] - f*grad_c2*invdenom*2.0 * b_vector[1];
|
||||||
|
|
||||||
|
|
||||||
double xk[bord_num+1]; // Nvidia C 23.1-0 compiler crashes here (skylake avx512) nvc nvfoftran --enable-hpc
|
double xk[bord_num+1]; // Nvidia C 23.1-0 compiler crashes here (skylake avx512) nvc nvfoftran --enable-hpc
|
||||||
@ -3007,11 +3007,11 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
|
|||||||
for (int k=2 ; k<= bord_num ; ++k) {
|
for (int k=2 ; k<= bord_num ; ++k) {
|
||||||
const double f1 = b_vector[k] * kf[k] * xk[k-2];
|
const double f1 = b_vector[k] * kf[k] * xk[k-2];
|
||||||
const double f2 = f1*xk[1];
|
const double f2 = f1*xk[1];
|
||||||
factor_ee_gl_0[i] += f2*dx[0];
|
factor_ee_gl_0[i] = factor_ee_gl_0[i] + f2*dx[0];
|
||||||
factor_ee_gl_1[i] += f2*dx[1];
|
factor_ee_gl_1[i] = factor_ee_gl_1[i] + f2*dx[1];
|
||||||
factor_ee_gl_2[i] += f2*dx[2];
|
factor_ee_gl_2[i] = factor_ee_gl_2[i] + f2*dx[2];
|
||||||
factor_ee_gl_3[i] += f2*dx[3];
|
factor_ee_gl_3[i] = factor_ee_gl_3[i] + f2*dx[3];
|
||||||
factor_ee_gl_3[i] += f1*kf[k-1]*grad_c2;
|
factor_ee_gl_3[i] = factor_ee_gl_3[i] + f1*kf[k-1]*grad_c2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4483,15 +4483,15 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_en_hpc (
|
|||||||
const double* en_distance_rescaled__ = &(en_distance_rescaled_[a*elec_num]);
|
const double* en_distance_rescaled__ = &(en_distance_rescaled_[a*elec_num]);
|
||||||
const double* a_vec = &(a_vector[(aord_num+1)*type_nucl_vector[a]]);
|
const double* a_vec = &(a_vector[(aord_num+1)*type_nucl_vector[a]]);
|
||||||
|
|
||||||
factor_en[nw] -= asymp_jasa[type_nucl_vector[a]]*de;
|
factor_en[nw] = factor_en[nw] - asymp_jasa[type_nucl_vector[a]]*de;
|
||||||
|
|
||||||
for (int64_t i=0 ; i<elec_num ; ++i) {
|
for (int64_t i=0 ; i<elec_num ; ++i) {
|
||||||
double x = en_distance_rescaled__[i];
|
double x = en_distance_rescaled__[i];
|
||||||
factor_en[nw] += a_vec[0]*x / (1.0 + a_vec[1]*x);
|
factor_en[nw] = factor_en[nw] + a_vec[0]*x / (1.0 + a_vec[1]*x);
|
||||||
|
|
||||||
for (int64_t p=2 ; p <= aord_num ; ++p) {
|
for (int64_t p=2 ; p <= aord_num ; ++p) {
|
||||||
x *= en_distance_rescaled__[i];
|
x *= en_distance_rescaled__[i];
|
||||||
factor_en[nw] += a_vec[p]*x;
|
factor_en[nw] = factor_en[nw] + a_vec[p]*x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4881,11 +4881,11 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context,
|
|||||||
|
|
||||||
double f = a_vec[0] * invdenom2;
|
double f = a_vec[0] * invdenom2;
|
||||||
|
|
||||||
factor_en_gl_0[i] += f*dx[0];
|
factor_en_gl_0[i] = factor_en_gl_0[i] + f*dx[0];
|
||||||
factor_en_gl_1[i] += f*dx[1];
|
factor_en_gl_1[i] = factor_en_gl_1[i] + f*dx[1];
|
||||||
factor_en_gl_2[i] += f*dx[2];
|
factor_en_gl_2[i] = factor_en_gl_2[i] + f*dx[2];
|
||||||
factor_en_gl_3[i] += f*dx[3];
|
factor_en_gl_3[i] = factor_en_gl_3[i] + f*dx[3];
|
||||||
factor_en_gl_3[i] -= f*grad_c2*invdenom*2.0 * a_vec[1];
|
factor_en_gl_3[i] = factor_en_gl_3[i] - f*grad_c2*invdenom*2.0 * a_vec[1];
|
||||||
|
|
||||||
|
|
||||||
double xk[aord_num+1];
|
double xk[aord_num+1];
|
||||||
@ -4897,11 +4897,11 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context,
|
|||||||
for (int k=2 ; k<= aord_num ; ++k) {
|
for (int k=2 ; k<= aord_num ; ++k) {
|
||||||
const double f1 = a_vec[k] * kf[k] * xk[k-2];
|
const double f1 = a_vec[k] * kf[k] * xk[k-2];
|
||||||
const double f2 = f1*xk[1];
|
const double f2 = f1*xk[1];
|
||||||
factor_en_gl_0[i] += f2*dx[0];
|
factor_en_gl_0[i] = factor_en_gl_0[i] + f2*dx[0];
|
||||||
factor_en_gl_1[i] += f2*dx[1];
|
factor_en_gl_1[i] = factor_en_gl_1[i] + f2*dx[1];
|
||||||
factor_en_gl_2[i] += f2*dx[2];
|
factor_en_gl_2[i] = factor_en_gl_2[i] + f2*dx[2];
|
||||||
factor_en_gl_3[i] += f2*dx[3];
|
factor_en_gl_3[i] = factor_en_gl_3[i] + f2*dx[3];
|
||||||
factor_en_gl_3[i] += f1*kf[k-1]*grad_c2;
|
factor_en_gl_3[i] = factor_en_gl_3[i] + f1*kf[k-1]*grad_c2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -10214,7 +10214,7 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
double tmp3[elec_num];
|
double tmp3[elec_num];
|
||||||
|
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_0nw[j] += cn *
|
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||||
@ -10225,29 +10225,29 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_1nw[j] += cn *
|
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||||
tmp3[j] +=
|
tmp3[j] = tmp3[j] +
|
||||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_2nw[j] += cn *
|
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||||
tmp3[j] +=
|
tmp3[j] = tmp3[j] +
|
||||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_3nw[j] += cn *
|
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn *
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
||||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
||||||
|
@ -526,9 +526,9 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
|
|||||||
for (int64_t k=0 ; k<ctx->ao_basis.ao_num ; ++k) {
|
for (int64_t k=0 ; k<ctx->ao_basis.ao_num ; ++k) {
|
||||||
if ( ctx->ao_basis.ao_nucl[k] == inucl && ctx->ao_basis.ao_ang_mom[k] == 0) {
|
if ( ctx->ao_basis.ao_nucl[k] == inucl && ctx->ao_basis.ao_ang_mom[k] == 0) {
|
||||||
const double ck = ctx->mo_basis.coefficient[k + i*ctx->ao_basis.ao_num];
|
const double ck = ctx->mo_basis.coefficient[k + i*ctx->ao_basis.ao_num];
|
||||||
qmckl_ten3(mo_vgl_at_r_cusp_s,i,0,inucl) += ck * qmckl_ten3(ao_vgl_at_r_cusp_s,k,0,inucl);
|
qmckl_ten3(mo_vgl_at_r_cusp_s,i,0,inucl) = qmckl_ten3(mo_vgl_at_r_cusp_s,i,0,inucl) + ck * qmckl_ten3(ao_vgl_at_r_cusp_s,k,0,inucl);
|
||||||
qmckl_ten3(mo_vgl_at_r_cusp_s,i,1,inucl) += ck * qmckl_ten3(ao_vgl_at_r_cusp_s,k,3,inucl);
|
qmckl_ten3(mo_vgl_at_r_cusp_s,i,1,inucl) = qmckl_ten3(mo_vgl_at_r_cusp_s,i,1,inucl) + ck * qmckl_ten3(ao_vgl_at_r_cusp_s,k,3,inucl);
|
||||||
qmckl_ten3(mo_vgl_at_r_cusp_s,i,2,inucl) += ck * qmckl_ten3(ao_vgl_at_r_cusp_s,k,4,inucl);
|
qmckl_ten3(mo_vgl_at_r_cusp_s,i,2,inucl) = qmckl_ten3(mo_vgl_at_r_cusp_s,i,2,inucl) + ck * qmckl_ten3(ao_vgl_at_r_cusp_s,k,4,inucl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1363,7 +1363,8 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
vgl_sp[i] = vgl_sp[i] +
|
||||||
|
ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
||||||
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1377,7 +1378,7 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl_sp[i] += ck[i] * a1;
|
vgl_sp[i] = vgl_sp[i] + ck[i] * a1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1486,7 +1487,8 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
vgl1[i] = vgl1[i] +
|
||||||
|
ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
||||||
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1500,7 +1502,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck[i] * a1;
|
vgl1[i] = vgl1[i] + ck[i] * a1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2041,11 +2043,11 @@ IVDEP
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
||||||
vgl2[i] += ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
||||||
vgl3[i] += ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
||||||
vgl4[i] += ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
||||||
vgl5[i] += ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2062,11 +2064,11 @@ IVDEP
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck[i] * a1;
|
vgl1[i] = vgl1[i] + ck[i] * a1;
|
||||||
vgl2[i] += ck[i] * a2;
|
vgl2[i] = vgl2[i] + ck[i] * a2;
|
||||||
vgl3[i] += ck[i] * a3;
|
vgl3[i] = vgl3[i] + ck[i] * a3;
|
||||||
vgl4[i] += ck[i] * a4;
|
vgl4[i] = vgl4[i] + ck[i] * a4;
|
||||||
vgl5[i] += ck[i] * a5;
|
vgl5[i] = vgl5[i] + ck[i] * a5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2222,11 +2224,11 @@ qmckl_compute_mo_basis_mo_vgl_hpc_sp (const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl_sp1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
vgl_sp1[i] = vgl_sp1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
||||||
vgl_sp2[i] += ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
vgl_sp2[i] = vgl_sp2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
||||||
vgl_sp3[i] += ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
vgl_sp3[i] = vgl_sp3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
||||||
vgl_sp4[i] += ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
vgl_sp4[i] = vgl_sp4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
||||||
vgl_sp5[i] += ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
vgl_sp5[i] = vgl_sp5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2243,11 +2245,11 @@ qmckl_compute_mo_basis_mo_vgl_hpc_sp (const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl_sp1[i] += ck[i] * a1;
|
vgl_sp1[i] = vgl_sp1[i] + ck[i] * a1;
|
||||||
vgl_sp2[i] += ck[i] * a2;
|
vgl_sp2[i] = vgl_sp2[i] + ck[i] * a2;
|
||||||
vgl_sp3[i] += ck[i] * a3;
|
vgl_sp3[i] = vgl_sp3[i] + ck[i] * a3;
|
||||||
vgl_sp4[i] += ck[i] * a4;
|
vgl_sp4[i] = vgl_sp4[i] + ck[i] * a4;
|
||||||
vgl_sp5[i] += ck[i] * a5;
|
vgl_sp5[i] = vgl_sp5[i] + ck[i] * a5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
IVDEP
|
IVDEP
|
||||||
@ -2599,7 +2601,7 @@ IVDEP
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck[i] * a1;
|
vgl1[i] = vgl1[i] + ck[i] * a1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2608,7 +2610,7 @@ IVDEP
|
|||||||
const double r = ria[inucl];
|
const double r = ria[inucl];
|
||||||
IVDEP
|
IVDEP
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
vgl1[i] = vgl1[i] + qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,2,inucl) + r*(
|
qmckl_ten3(cusp_param,i,2,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,3,inucl) )));
|
qmckl_ten3(cusp_param,i,3,inucl) )));
|
||||||
@ -3032,11 +3034,11 @@ IVDEP
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck[i] * a1;
|
vgl1[i] = vgl1[i] + ck[i] * a1;
|
||||||
vgl2[i] += ck[i] * a2;
|
vgl2[i] = vgl2[i] + ck[i] * a2;
|
||||||
vgl3[i] += ck[i] * a3;
|
vgl3[i] = vgl3[i] + ck[i] * a3;
|
||||||
vgl4[i] += ck[i] * a4;
|
vgl4[i] = vgl4[i] + ck[i] * a4;
|
||||||
vgl5[i] += ck[i] * a5;
|
vgl5[i] = vgl5[i] + ck[i] * a5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3052,7 +3054,7 @@ IVDEP
|
|||||||
|
|
||||||
IVDEP
|
IVDEP
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
vgl1[i] = vgl1[i] + qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,2,inucl) + r*(
|
qmckl_ten3(cusp_param,i,2,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,3,inucl) )));
|
qmckl_ten3(cusp_param,i,3,inucl) )));
|
||||||
@ -3061,11 +3063,11 @@ IVDEP
|
|||||||
2.0*qmckl_ten3(cusp_param,i,2,inucl) +
|
2.0*qmckl_ten3(cusp_param,i,2,inucl) +
|
||||||
r * 3.0 * qmckl_ten3(cusp_param,i,3,inucl);
|
r * 3.0 * qmckl_ten3(cusp_param,i,3,inucl);
|
||||||
|
|
||||||
vgl2[i] += r_vec[0] * c1;
|
vgl2[i] = vgl2[i] + r_vec[0] * c1;
|
||||||
vgl3[i] += r_vec[1] * c1;
|
vgl3[i] = vgl3[i] + r_vec[1] * c1;
|
||||||
vgl4[i] += r_vec[2] * c1;
|
vgl4[i] = vgl4[i] + r_vec[2] * c1;
|
||||||
|
|
||||||
vgl5[i] += 2.0*qmckl_ten3(cusp_param,i,1,inucl)*r_inv +
|
vgl5[i] = vgl5[i] + 2.0*qmckl_ten3(cusp_param,i,1,inucl)*r_inv +
|
||||||
6.0*qmckl_ten3(cusp_param,i,2,inucl) +
|
6.0*qmckl_ten3(cusp_param,i,2,inucl) +
|
||||||
12.0*qmckl_ten3(cusp_param,i,3,inucl)*r;
|
12.0*qmckl_ten3(cusp_param,i,3,inucl)*r;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user