diff --git a/org/qmckl_jastrow_champ.org b/org/qmckl_jastrow_champ.org index 284daa8..28f6bc0 100644 --- a/org/qmckl_jastrow_champ.org +++ b/org/qmckl_jastrow_champ.org @@ -11280,23 +11280,23 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, double cn = c_vector_full[a+n*nucl_num]; if (cn == 0.0) continue; - const size_t ishift = elec_num*a; + const size_t ishift = elec_num*a; const size_t ishift4 = ishift*4; const double* restrict tmp_c_amkn = tmp_c_mkn + ishift; const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift; - const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift; + const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift; const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift; - const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; + const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4; const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4; const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4; - const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num; + const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num; const double* restrict dtmp_c_1amlknw = dtmp_c_0amlknw + elec_num; - const double* restrict dtmp_c_2amknw = dtmp_c_0amknw + elec_num2; + const double* restrict dtmp_c_2amknw = dtmp_c_0amknw + elec_num2; const double* restrict dtmp_c_2amlknw = dtmp_c_0amlknw + elec_num2; - const double* restrict dtmp_c_3amknw = dtmp_c_0amknw + elec_num3; + const double* restrict dtmp_c_3amknw = dtmp_c_0amknw + elec_num3; const double* restrict dtmp_c_3amlknw = dtmp_c_0amlknw + elec_num3; const double* restrict een_rescaled_n_gl_1amnw = een_rescaled_n_gl_0amnw + elec_num; @@ -11305,6 +11305,7 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, const double* restrict een_rescaled_n_gl_2amlnw = een_rescaled_n_gl_0amlnw + elec_num2; const double* restrict een_rescaled_n_gl_3amnw = een_rescaled_n_gl_0amnw + elec_num3; const double* restrict een_rescaled_n_gl_3amlnw = een_rescaled_n_gl_0amlnw + elec_num3; + double* const restrict factor_een_gl_1nw = factor_een_gl_0nw + elec_num; double* const restrict factor_een_gl_2nw = factor_een_gl_0nw + elec_num2; double* const restrict factor_een_gl_3nw = factor_een_gl_0nw + elec_num3; @@ -11571,7 +11572,7 @@ assert(qmckl_jastrow_champ_provided(context)); | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | in | vector of non-zero coefficients | | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | | ~een_rescaled_n_gl~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | in | Derivative of Electron-nucleus rescaled factor | - | ~factor_een_grad~ | ~double[walk_num][4][elec_num]~ | out | Derivative of Electron-nucleus jastrow | + | ~factor_een_grad~ | ~double[walk_num][3][elec_num]~ | out | Derivative of Electron-nucleus jastrow | #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -11631,10 +11632,10 @@ function qmckl_compute_jastrow_champ_factor_een_grad_doc( & do ii = 1, 3 do j = 1, elec_num factor_een_grad(j,ii,nw) = factor_een_grad(j,ii,nw) + ( & - tmp_c(j,a,m,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + & - (dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & - (dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & - tmp_c(j,a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m,nw) & + dtmp_c(j,ii,a,m ,k,nw) * een_rescaled_n (j, a,m+l,nw) + & + dtmp_c(j,ii,a,m+l,k,nw) * een_rescaled_n (j, a,m ,nw) + & + tmp_c(j,a,m ,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + & + tmp_c(j,a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m ,nw) & ) * cn end do end do @@ -11685,7 +11686,7 @@ end function qmckl_compute_jastrow_champ_factor_een_grad_doc const double* een_rescaled_n_gl, double* const factor_een_grad ); #+end_src - + #+begin_src c :tangle (eval c) :comments org qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_grad(const qmckl_context context, @@ -11798,35 +11799,37 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context, const double* restrict tmp_c_mlkn = tmp_c_mkn + len; const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]); const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len; - const double* restrict dtmp_c_mknw = &(dtmp_c[addr0 << 2]); + const double* restrict dtmp_c_mknw = &(dtmp_c[addr0*4]); const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4; - const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]); + const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1*4]); const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4; for (size_t a = 0; a < (size_t) nucl_num; a++) { double cn = c_vector_full[a+n*nucl_num]; if (cn == 0.0) continue; - const size_t ishift = elec_num*a; + const size_t ishift = elec_num*a; const size_t ishift4 = ishift*4; const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift; const double* restrict tmp_c_amkn = tmp_c_mkn + ishift; - const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift; + const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift; const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift; - const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; + const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4; - const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4; + const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4; const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4; - const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num; + const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num; const double* restrict dtmp_c_1amlknw = dtmp_c_0amlknw + elec_num; - const double* restrict dtmp_c_2amknw = dtmp_c_0amknw + elec_num2; + const double* restrict dtmp_c_2amknw = dtmp_c_0amknw + elec_num2; const double* restrict dtmp_c_2amlknw = dtmp_c_0amlknw + elec_num2; + const double* restrict een_rescaled_n_gl_1amnw = een_rescaled_n_gl_0amnw + elec_num; const double* restrict een_rescaled_n_gl_1amlnw = een_rescaled_n_gl_0amlnw + elec_num; const double* restrict een_rescaled_n_gl_2amnw = een_rescaled_n_gl_0amnw + elec_num2; const double* restrict een_rescaled_n_gl_2amlnw = een_rescaled_n_gl_0amlnw + elec_num2; + double* const restrict factor_een_grad_1nw = factor_een_grad_0nw + elec_num; double* const restrict factor_een_grad_2nw = factor_een_grad_0nw + elec_num2; @@ -11834,36 +11837,37 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context, #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_grad_0nw[j] = factor_een_grad_0nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]); + factor_een_grad_0nw[j] = factor_een_grad_0nw[j] + cn * ( + dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j] ); + } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_grad_1nw[j] = factor_een_grad_1nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]); + factor_een_grad_1nw[j] = factor_een_grad_1nw[j] + cn * ( + dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]); } #ifdef HAVE_OPENMP #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - factor_een_grad_2nw[j] = factor_een_grad_2nw[j] + cn * - (tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + - dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]); + factor_een_grad_2nw[j] = factor_een_grad_2nw[j] + cn * ( + dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] + + dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] + + tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]); } - } + } } } return info; @@ -12973,7 +12977,8 @@ assert(qmckl_jastrow_champ_provided(context)); for (int64_t e=0 ; e 1e-12) { printf("%ld %ld %ld\n", k, m, e); - printf("total_j_grad = %20.15e\n", total_j_grad[k][m][e]); + printf("total_j_deriv = %20.15e\n", total_j_deriv[k][m][e]); + printf("total_j_grad = %20.15e\n", total_j_grad[k][m][e]); fflush(stdout); } assert (fabs(total_j_deriv[k][m][e] - total_j_grad[k][m][e]) < 1.e-12);