From 0c978979fcf781893e59c174cfe8d40455fa55c7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 27 Aug 2021 13:57:19 +0200 Subject: [PATCH 01/19] Update CHBrClF test --- org/qmckl_tests.org | 104 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/org/qmckl_tests.org b/org/qmckl_tests.org index 61f3151..4b2e47b 100644 --- a/org/qmckl_tests.org +++ b/org/qmckl_tests.org @@ -556,21 +556,95 @@ double chbrclf_basis_shell_factor[chbrclf_shell_num] = 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.}; -double chbrclf_basis_ao_factor[chbrclf_ao_num] = - {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., - 1., 1., 1.}; +double chbrclf_basis_ao_factor[chbrclf_ao_num] = { + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.7320508075688772e+00, 1.7320508075688772e+00, 9.9999999999999978e-01, + 1.7320508075688772e+00, 9.9999999999999978e-01, 1.0000000000000000e+00, + 1.7320508075688772e+00, 1.7320508075688772e+00, 1.0000000000000000e+00, + 1.7320508075688772e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.7320508075688776e+00, 1.7320508075688776e+00, 1.0000000000000000e+00, + 1.7320508075688776e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 2.2360679774997894e+00, 2.2360679774997894e+00, 2.2360679774997898e+00, + 3.8729833462074166e+00, 2.2360679774997894e+00, 1.0000000000000000e+00, + 2.2360679774997898e+00, 2.2360679774997894e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 2.2360679774997894e+00, 2.2360679774997894e+00, + 2.2360679774997894e+00, 3.8729833462074157e+00, 2.2360679774997894e+00, + 1.0000000000000000e+00, 2.2360679774997894e+00, 2.2360679774997894e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000002e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.7320508075688774e+00, 1.7320508075688772e+00, 1.0000000000000000e+00, + 1.7320508075688774e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.7320508075688776e+00, 1.7320508075688776e+00, 1.0000000000000000e+00, + 1.7320508075688776e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000002e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.7320508075688774e+00, + 1.7320508075688774e+00, 1.0000000000000000e+00, 1.7320508075688774e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.7320508075688772e+00, + 1.7320508075688772e+00, 1.0000000000000000e+00, 1.7320508075688772e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.7320508075688774e+00, + 1.7320508075688774e+00, 1.0000000000000000e+00, 1.7320508075688774e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 2.2360679774997902e+00, + 2.2360679774997902e+00, 2.2360679774997902e+00, 3.8729833462074170e+00, + 2.2360679774997902e+00, 1.0000000000000000e+00, 2.2360679774997902e+00, + 2.2360679774997902e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 2.2360679774997902e+00, 2.2360679774997902e+00, 2.2360679774997902e+00, + 3.8729833462074170e+00, 2.2360679774997902e+00, 1.0000000000000000e+00, + 2.2360679774997902e+00, 2.2360679774997902e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688770e+00, 1.7320508075688770e+00, + 1.0000000000000000e+00, 1.7320508075688770e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688774e+00, 1.7320508075688774e+00, + 1.0000000000000000e+00, 1.7320508075688774e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688770e+00, 1.7320508075688770e+00, + 1.0000000000000000e+00, 1.7320508075688770e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 2.2360679774997898e+00, 2.2360679774997898e+00, + 2.2360679774997902e+00, 3.8729833462074170e+00, 2.2360679774997898e+00, + 9.9999999999999989e-01, 2.2360679774997902e+00, 2.2360679774997898e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 2.2360679774997894e+00, + 2.2360679774997894e+00, 2.2360679774997894e+00, 3.8729833462074175e+00, + 2.2360679774997894e+00, 1.0000000000000000e+00, 2.2360679774997894e+00, + 2.2360679774997894e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688774e+00, 1.7320508075688774e+00, + 1.0000000000000000e+00, 1.7320508075688774e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688772e+00, 1.7320508075688772e+00, + 1.0000000000000000e+00, 1.7320508075688772e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688776e+00, 1.7320508075688776e+00, + 1.0000000000000000e+00, 1.7320508075688776e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 1.7320508075688774e+00, 1.7320508075688774e+00, + 1.0000000000000000e+00, 1.7320508075688774e+00, 1.0000000000000000e+00, + 1.0000000000000000e+00, 2.2360679774997898e+00, 2.2360679774997898e+00, + 2.2360679774997898e+00, 3.8729833462074170e+00, 2.2360679774997898e+00, + 1.0000000000000000e+00, 2.2360679774997898e+00, 2.2360679774997898e+00, + 1.0000000000000000e+00, 1.0000000000000000e+00, 2.2360679774997902e+00, + 2.2360679774997898e+00, 2.2360679774997902e+00, 3.8729833462074170e+00, + 2.2360679774997898e+00, 1.0000000000000002e+00, 2.2360679774997902e+00, + 2.2360679774997902e+00, 1.0000000000000000e+00}; int64_t chbrclf_basis_ao_shell[chbrclf_ao_num] = {0, 1, 2, 3, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, From 685b7201fc7671e8d8fc73273f1029cd771aeb3c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 14 Feb 2022 19:11:37 +0100 Subject: [PATCH 02/19] Accelerated AOs --- org/qmckl_ao.org | 407 +++++++++++++++++++++++++++++++----------- org/qmckl_jastrow.org | 384 ++++++++++++++++++++++++++++----------- 2 files changed, 580 insertions(+), 211 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index d40e478..8783884 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3387,6 +3387,7 @@ for (j=0 ; j cutoff*nucleus_range(inucl)) then + cycle + end if + + ! C is zero-based, so shift bounds by one + ishell_start = nucleus_index(inucl) + 1 + ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell=ishell_start, ishell_end shell_vgl(ishell, 1, ipoint) = 0.d0 @@ -3523,6 +3530,7 @@ end function qmckl_compute_ao_basis_shell_gaussian_vgl_f nucl_num, & nucleus_shell_num, & nucleus_index, & + nucleus_range, & shell_prim_index, & shell_prim_num, & coord, & @@ -3542,6 +3550,7 @@ end function qmckl_compute_ao_basis_shell_gaussian_vgl_f integer (c_int64_t) , intent(in) , value :: nucl_num integer (c_int64_t) , intent(in) :: nucleus_shell_num(nucl_num) integer (c_int64_t) , intent(in) :: nucleus_index(nucl_num) + real (c_double ) , intent(in) :: nucleus_range(nucl_num) integer (c_int64_t) , intent(in) :: shell_prim_index(shell_num) integer (c_int64_t) , intent(in) :: shell_prim_num(shell_num) real (c_double ) , intent(in) :: coord(point_num,3) @@ -3559,6 +3568,7 @@ end function qmckl_compute_ao_basis_shell_gaussian_vgl_f nucl_num, & nucleus_shell_num, & nucleus_index, & + nucleus_range, & shell_prim_index, & shell_prim_num, & coord, & @@ -3625,6 +3635,7 @@ qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context) ctx->nucleus.num, ctx->ao_basis.nucleus_shell_num, ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_range, ctx->ao_basis.shell_prim_index, ctx->ao_basis.shell_prim_num, ctx->point.coord.data, @@ -4263,7 +4274,7 @@ integer function qmckl_ao_polynomial_transp_vgl_f (context, & integer :: a,b,c,d real*8 :: Y(3) integer :: lmax_array(3) - real*8 :: pows(-2:lmax,3) + real*8 :: pows(-2:21,3) ! lmax < 22 double precision :: xy, yz, xz double precision :: da, db, dc, dd @@ -4290,17 +4301,12 @@ integer function qmckl_ao_polynomial_transp_vgl_f (context, & endif - do i=1,3 - Y(i) = X(i) - R(i) - end do - lmax_array(1:3) = lmax - if (lmax == 0) then - VGL(1,1) = 1.d0 - VGL(1,2:5) = 0.d0 - l(1:3,1) = 0 - n=1 - else if (lmax > 0) then + if (lmax > 0) then + + do i=1,3 + Y(i) = X(i) - R(i) + end do pows(-2:0,1:3) = 1.d0 do i=1,lmax pows(i,1) = pows(i-1,1) * Y(1) @@ -4327,6 +4333,12 @@ integer function qmckl_ao_polynomial_transp_vgl_f (context, & VGL(4,4) = 1.d0 n=4 + else + VGL(1,1) = 1.d0 + VGL(1,2:5) = 0.d0 + l(1:3,1) = 0 + n=1 + return endif ! l>=2 @@ -4535,7 +4547,8 @@ end function test_qmckl_ao_polynomial_vgl :FRetType: qmckl_exit_code :END: - #+NAME: qmckl_ao_vgl_args +** Unoptimized version + #+NAME: qmckl_ao_vgl_args_doc | Variable | Type | In/Out | Description | |-----------------------+-----------------------------------+--------+----------------------------------------------| | ~context~ | ~qmckl_context~ | in | Global state | @@ -4552,9 +4565,8 @@ end function test_qmckl_ao_polynomial_vgl | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells | - | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | -** Unoptimized version #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_ao_vgl_doc_f(context, & ao_num, shell_num, point_num, nucl_num, & @@ -4591,15 +4603,25 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & integer, external :: qmckl_ao_polynomial_vgl_f double precision, allocatable :: poly_vgl(:,:) - integer , allocatable :: powers(:,:) + integer , allocatable :: powers(:,:), ao_index(:) - allocate(poly_vgl(5,ao_num), powers(3,ao_num)) + allocate(poly_vgl(5,ao_num), powers(3,ao_num), ao_index(ao_num)) ! Pre-computed data do l=0,20 lstart(l) = l*(l+1)*(l+2)/6 +1 end do + k=1 + do inucl=1,nucl_num + ishell_start = nucleus_index(inucl) + 1 + ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell = ishell_start, ishell_end + l = shell_ang_mom(ishell) + ao_index(ishell) = k + k = k + lstart(l+1) - lstart(l) + end do + end do info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. @@ -4609,7 +4631,6 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & e_coord(1) = coord(ipoint,1) e_coord(2) = coord(ipoint,2) e_coord(3) = coord(ipoint,3) - k=1 do inucl=1,nucl_num n_coord(1) = nucl_coord(inucl,1) n_coord(2) = nucl_coord(inucl,2) @@ -4620,7 +4641,7 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & y = e_coord(2) - n_coord(2) z = e_coord(3) - n_coord(3) - r2 = x*x + z*z + z*z + r2 = x*x + y*y + z*z if (r2 > cutoff*nucleus_range(inucl)) then cycle @@ -4635,6 +4656,7 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & ishell_start = nucleus_index(inucl) + 1 ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) do ishell = ishell_start, ishell_end + k = ao_index(ishell) l = shell_ang_mom(ishell) do il = lstart(l), lstart(l+1)-1 ! Value @@ -4680,18 +4702,43 @@ end function qmckl_compute_ao_vgl_doc_f #+end_src ** HPC version + #+NAME: qmckl_ao_vgl_args_hpc + | Variable | Type | In/Out | Description | + |-----------------------+--------------------------------+--------+----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~prim_num~ | ~int64_t~ | in | Number of primitives | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell | + | ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | + #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_ao_vgl_hpc_f(context, & - ao_num, shell_num, point_num, nucl_num, & +integer function qmckl_compute_ao_vgl_hpc_f(context, & + ao_num, shell_num, prim_num, point_num, nucl_num, & coord, nucl_coord, nucleus_index, nucleus_shell_num, & - nucleus_range, nucleus_max_ang_mom, shell_ang_mom, & - ao_factor, shell_vgl, ao_vgl) & + nucleus_range, nucleus_max_ang_mom, shell_ang_mom, & + shell_prim_index, shell_prim_num, ao_factor, expo, & + coef_normalized, ao_vgl) & result(info) use qmckl implicit none integer(qmckl_context), intent(in) :: context integer*8 , intent(in) :: ao_num integer*8 , intent(in) :: shell_num + integer*8 , intent(in) :: prim_num integer*8 , intent(in) :: point_num integer*8 , intent(in) :: nucl_num double precision , intent(in) :: coord(point_num,3) @@ -4701,8 +4748,11 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & double precision , intent(in) :: nucleus_range(nucl_num) integer , intent(in) :: nucleus_max_ang_mom(nucl_num) integer , intent(in) :: shell_ang_mom(shell_num) + integer*8 , intent(in) :: shell_prim_index(shell_num) + integer*8 , intent(in) :: shell_prim_num(shell_num) double precision , intent(in) :: ao_factor(ao_num) - double precision , intent(in) :: shell_vgl(shell_num,5,point_num) + double precision , intent(in) :: expo(prim_num) + double precision , intent(in) :: coef_normalized(prim_num) double precision , intent(out) :: ao_vgl(ao_num,5,point_num) double precision :: e_coord(3), n_coord(3) @@ -4710,21 +4760,37 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & integer :: l, il, k integer*8 :: ipoint, inucl, ishell integer*8 :: ishell_start, ishell_end - integer :: lstart(0:20) - double precision :: x, y, z, r2 - double precision :: cutoff + integer :: lstart(0:20) + double precision :: x, y, z, r2, s1, s2, s3, s4, s5, s6 + double precision :: cutoff, v, two_a + integer*8 :: iprim_start , iprim_end, iprim integer, external :: qmckl_ao_polynomial_transp_vgl_f double precision, allocatable :: poly_vgl(:,:) - integer , allocatable :: powers(:,:) + integer , allocatable :: powers(:,:), ao_index(:) - allocate(poly_vgl(ao_num,5), powers(3,ao_num)) + integer :: nidx, idx, n + double precision, allocatable :: ar2(:), expo_(:), c_(:) + + allocate(poly_vgl(ao_num,5), powers(3,ao_num), ao_index(ao_num)) + allocate(c_(prim_num), expo_(prim_num), ar2(prim_num)) ! Pre-computed data do l=0,20 lstart(l) = l*(l+1)*(l+2)/6 +1 end do + k=1 + do inucl=1,nucl_num + ishell_start = nucleus_index(inucl) + 1 + ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell = ishell_start, ishell_end + l = shell_ang_mom(ishell) + ao_index(ishell) = k + k = k + lstart(l+1) - lstart(l) + end do + end do + info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. @@ -4734,7 +4800,6 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & e_coord(1) = coord(ipoint,1) e_coord(2) = coord(ipoint,2) e_coord(3) = coord(ipoint,3) - k=1 do inucl=1,nucl_num n_coord(1) = nucl_coord(inucl,1) n_coord(2) = nucl_coord(inucl,2) @@ -4745,7 +4810,7 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & y = e_coord(2) - n_coord(2) z = e_coord(3) - n_coord(3) - r2 = x*x + z*z + z*z + r2 = x*x + y*y + z*z if (r2 > cutoff*nucleus_range(inucl)) then cycle @@ -4759,68 +4824,80 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & ! Loop over shells ishell_start = nucleus_index(inucl) + 1 ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell = ishell_start, ishell_end + iprim_start = shell_prim_index(ishell) + 1 + iprim_end = shell_prim_index(ishell) + shell_prim_num(ishell) + + ! /!\ Gaussian fuctions + nidx = 0 + do iprim = iprim_start, iprim_end + v = expo(iprim)*r2 + if (v > cutoff) then + cycle + end if + nidx = nidx+1 + ar2(nidx) = v + c_(nidx) = coef_normalized(iprim) + expo_(nidx) = expo(iprim) + enddo + + s1 = 0.d0 + s5 = 0.d0 + s6 = 0.d0 + do idx = 1, nidx + v = c_(idx) * dexp(-ar2(idx)) + s1 = s1 + v + s6 = s6 - expo_(idx) * v + s5 = s5 + ar2(idx) + end do + s6 = s6 +s6 + s5 = 2.d0*s5 + s6*3.d0 + s2 = s6 * x + s3 = s6 * y + s4 = s6 * z + l = shell_ang_mom(ishell) - if (shell_vgl(ishell,1,ipoint) /= 0.d0) then - do il = lstart(l), lstart(l+1)-1 - ! Value - ao_vgl(k,1,ipoint) = & - poly_vgl(il,1) * shell_vgl(ishell,1,ipoint) * ao_factor(k) - - ! Grad_x - ao_vgl(k,2,ipoint) = ( & - poly_vgl(il,2) * shell_vgl(ishell,1,ipoint) + & - poly_vgl(il,1) * shell_vgl(ishell,2,ipoint) & - ) * ao_factor(k) - - ! Grad_y - ao_vgl(k,3,ipoint) = ( & - poly_vgl(il,3) * shell_vgl(ishell,1,ipoint) + & - poly_vgl(il,1) * shell_vgl(ishell,3,ipoint) & - ) * ao_factor(k) - - ! Grad_z - ao_vgl(k,4,ipoint) = ( & - poly_vgl(il,4) * shell_vgl(ishell,1,ipoint) + & - poly_vgl(il,1) * shell_vgl(ishell,4,ipoint) & - ) * ao_factor(k) - - ! Lapl_z - ao_vgl(k,5,ipoint) = ( & - poly_vgl(il,5) * shell_vgl(ishell,1,ipoint) + & - poly_vgl(il,1) * shell_vgl(ishell,5,ipoint) + & - 2.d0 * ( & - poly_vgl(il,2) * shell_vgl(ishell,2,ipoint) + & - poly_vgl(il,3) * shell_vgl(ishell,3,ipoint) + & - poly_vgl(il,4) * shell_vgl(ishell,4,ipoint) ) & - ) * ao_factor(k) - k = k+1 + k = ao_index(ishell) + n = lstart(l+1)-lstart(l) + if (nidx > 0) then + idx = lstart(l) + do il = 0,n-1 + ao_vgl(k+il,1,ipoint) = poly_vgl(idx+il,1) * s1 * ao_factor(k+il) + ao_vgl(k+il,2,ipoint) = (poly_vgl(idx+il,2) * s1 + poly_vgl(idx+il,1) * s2) * ao_factor(k+il) + ao_vgl(k+il,3,ipoint) = (poly_vgl(idx+il,3) * s1 + poly_vgl(idx+il,1) * s3) * ao_factor(k+il) + ao_vgl(k+il,4,ipoint) = (poly_vgl(idx+il,4) * s1 + poly_vgl(idx+il,1) * s4) * ao_factor(k+il) + ao_vgl(k+il,5,ipoint) = (poly_vgl(idx+il,5) * s1 + & + poly_vgl(idx+il,1) * s5 + 2.d0*( & + poly_vgl(idx+il,2) * s2 + & + poly_vgl(idx+il,3) * s3 + & + poly_vgl(idx+il,4) * s4 )) * ao_factor(k+il) end do else - do il = lstart(l), lstart(l+1)-1 - ao_vgl(k,1,ipoint) = 0.d0 - ao_vgl(k,2,ipoint) = 0.d0 - ao_vgl(k,3,ipoint) = 0.d0 - ao_vgl(k,4,ipoint) = 0.d0 - ao_vgl(k,5,ipoint) = 0.d0 - k = k+1 + do il = 0, n-1 + ao_vgl(k+il,1,ipoint) = 0.d0 + ao_vgl(k+il,2,ipoint) = 0.d0 + ao_vgl(k+il,3,ipoint) = 0.d0 + ao_vgl(k+il,4,ipoint) = 0.d0 + ao_vgl(k+il,5,ipoint) = 0.d0 end do - end if + endif + end do end do end do - + deallocate(poly_vgl, powers) end function qmckl_compute_ao_vgl_hpc_f #+end_src ** Interfaces -# #+CALL: generate_c_header(table=qmckl_ao_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl")) +# #+CALL: generate_c_header(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl")) # (Commented because the header needs to go into h_private_func #+RESULTS: #+begin_src c :tangle (eval h_private_func) :comments org - qmckl_exit_code qmckl_compute_ao_vgl ( + qmckl_exit_code qmckl_compute_ao_vgl_doc ( const qmckl_context context, const int64_t ao_num, const int64_t shell_num, @@ -4837,12 +4914,34 @@ end function qmckl_compute_ao_vgl_hpc_f const double* shell_vgl, double* const ao_vgl ); #+end_src + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_ao_vgl_hpc ( + const qmckl_context context, + const int64_t ao_num, + const int64_t shell_num, + const int64_t prim_num, + const int64_t point_num, + const int64_t nucl_num, + const double* coord, + const double* nucl_coord, + const int64_t* nucleus_index, + const int64_t* nucleus_shell_num, + const double* nucleus_range, + const int32_t* nucleus_max_ang_mom, + const int32_t* shell_ang_mom, + const int64_t* shell_prim_index, + const int64_t* shell_prim_num, + const double* ao_factor, + const double* expo, + const double* coef_normalized, + double* const ao_vgl ); + #+end_src - #+CALL: generate_c_interface(table=qmckl_ao_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl")) + #+CALL: generate_c_interface(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl_doc")) #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_ao_vgl & + integer(c_int32_t) function qmckl_compute_ao_vgl_doc & (context, & ao_num, & shell_num, & @@ -4879,13 +4978,8 @@ end function qmckl_compute_ao_vgl_hpc_f real (c_double ) , intent(in) :: shell_vgl(shell_num,5,point_num) real (c_double ) , intent(out) :: ao_vgl(ao_num,5,point_num) -#ifdef HAVE_HPC - integer(c_int32_t), external :: qmckl_compute_ao_vgl_hpc_f - info = qmckl_compute_ao_vgl_hpc_f & -#else integer(c_int32_t), external :: qmckl_compute_ao_vgl_doc_f info = qmckl_compute_ao_vgl_doc_f & -#endif (context, & ao_num, & shell_num, & @@ -4902,7 +4996,81 @@ end function qmckl_compute_ao_vgl_hpc_f shell_vgl, & ao_vgl) - end function qmckl_compute_ao_vgl + end function qmckl_compute_ao_vgl_doc + #+end_src + + #+CALL: generate_c_interface(table=qmckl_ao_vgl_args_hpc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl_hpc")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_ao_vgl_hpc & + (context, & + ao_num, & + shell_num, & + prim_num, & + point_num, & + nucl_num, & + coord, & + nucl_coord, & + nucleus_index, & + nucleus_shell_num, & + nucleus_range, & + nucleus_max_ang_mom, & + shell_ang_mom, & + shell_prim_index, & + shell_prim_num, & + ao_factor, & + ao_expo, & + coef_normalized, & + ao_vgl) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: ao_num + integer (c_int64_t) , intent(in) , value :: shell_num + integer (c_int64_t) , intent(in) , value :: prim_num + integer (c_int64_t) , intent(in) , value :: point_num + integer (c_int64_t) , intent(in) , value :: nucl_num + real (c_double ) , intent(in) :: coord(point_num,3) + real (c_double ) , intent(in) :: nucl_coord(nucl_num,3) + integer (c_int64_t) , intent(in) :: nucleus_index(nucl_num) + integer (c_int64_t) , intent(in) :: nucleus_shell_num(nucl_num) + real (c_double ) , intent(in) :: nucleus_range(nucl_num) + integer (c_int32_t) , intent(in) :: nucleus_max_ang_mom(nucl_num) + integer (c_int32_t) , intent(in) :: shell_ang_mom(shell_num) + integer (c_int64_t) , intent(in) :: shell_prim_index(shell_num) + integer (c_int64_t) , intent(in) :: shell_prim_num(shell_num) + real (c_double ) , intent(in) :: ao_factor(ao_num) + real (c_double ) , intent(in) :: ao_expo(prim_num) + real (c_double ) , intent(in) :: coef_normalized(prim_num) + real (c_double ) , intent(out) :: ao_vgl(ao_num,5,point_num) + + integer(c_int32_t), external :: qmckl_compute_ao_vgl_hpc_f + info = qmckl_compute_ao_vgl_hpc_f & + (context, & + ao_num, & + shell_num, & + prim_num, & + point_num, & + nucl_num, & + coord, & + nucl_coord, & + nucleus_index, & + nucleus_shell_num, & + nucleus_range, & + nucleus_max_ang_mom, & + shell_ang_mom, & + shell_prim_index, & + shell_prim_num, & + ao_factor, & + ao_expo, & + coef_normalized, & + ao_vgl) + + end function qmckl_compute_ao_vgl_hpc #+end_src *** Provide :noexport: @@ -4938,10 +5106,12 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) qmckl_exit_code rc; /* Provide required data */ +#ifndef HAVE_HPC rc = qmckl_provide_ao_basis_shell_vgl(context); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_provide_ao_basis_shell_vgl", NULL); } +#endif /* Allocate array */ if (ctx->ao_basis.ao_vgl == NULL) { @@ -4958,22 +5128,43 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) } ctx->ao_basis.ao_vgl = ao_vgl; } - - rc = qmckl_compute_ao_vgl(context, - ctx->ao_basis.ao_num, - ctx->ao_basis.shell_num, - ctx->point.num, - ctx->nucleus.num, - ctx->point.coord.data, - ctx->nucleus.coord.data, - ctx->ao_basis.nucleus_index, - ctx->ao_basis.nucleus_shell_num, - ctx->ao_basis.nucleus_range, - ctx->ao_basis.nucleus_max_ang_mom, - ctx->ao_basis.shell_ang_mom, - ctx->ao_basis.ao_factor, - ctx->ao_basis.shell_vgl, - ctx->ao_basis.ao_vgl); +#ifdef HAVE_HPC + rc = qmckl_compute_ao_vgl_hpc(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->ao_basis.prim_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.shell_prim_index, + ctx->ao_basis.shell_prim_num, + ctx->ao_basis.ao_factor, + ctx->ao_basis.exponent, + ctx->ao_basis.coefficient_normalized, + ctx->ao_basis.ao_vgl); +#else + rc = qmckl_compute_ao_vgl_doc(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.ao_factor, + ctx->ao_basis.shell_vgl, + ctx->ao_basis.ao_vgl); +#endif if (rc != QMCKL_SUCCESS) { return rc; } diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index abeac12..d745dcf 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -9,10 +9,10 @@ nuclear ($\mathbf{R}$) coordinates. Its defined as $\exp(J(\mathbf{r},\mathbf{R}))$, where \[ - J(\mathbf{r},\mathbf{R}) = J_{\text{eN}}(\mathbf{r},\mathbf{R}) + J_{\text{ee}}(\mathbf{r}) + J_{\text{eeN}}(\mathbf{r},\mathbf{R}) + J(\mathbf{r},\mathbf{R}) = J_{\text{eN}}(\mathbf{r},\mathbf{R}) + J_{\text{ee}}(\mathbf{r}) + J_{\text{eeN}}(\mathbf{r},\mathbf{R}) \] - In the following, we us the notations $r_{ij} = |\mathbf{r}_i - \mathbf{r}_j|$ and + In the following, we us the notations $r_{ij} = |\mathbf{r}_i - \mathbf{r}_j|$ and $R_{i\alpha} = |\mathbf{r}_i - \mathbf{R}_\alpha|$. $J_{\text{eN}}$ contains electron-nucleus terms: @@ -23,9 +23,9 @@ \sum_{p=2}^{N_\text{ord}^a} a_{p+1}\, [f(R_{i\alpha})]^p - J_{eN}^\infty \] - $J_{\text{ee}}$ contains electron-electron terms: + $J_{\text{ee}}$ contains electron-electron terms: \[ - J_{\text{ee}}(\mathbf{r}) = + J_{\text{ee}}(\mathbf{r}) = \sum_{i=1}^{N_\text{elec}} \sum_{j=1}^{i-1} \frac{b_1\, f(r_{ij})}{1+b_2\, f(r_{ij})} + \sum_{p=2}^{N_\text{ord}^b} a_{p+1}\, [f(r_{ij})]^p - J_{ee}^\infty @@ -43,7 +43,7 @@ \sum_{l=0}^{p-k-2\delta_{k,0}} c_{lkp\alpha} \left[ g({r}_{ij}) \right]^k \left[ \left[ g({R}_{i\alpha}) \right]^l + \left[ g({R}_{j\alpha}) \right]^l \right] - \left[ g({R}_{i\,\alpha}) \, g({R}_{j\alpha}) \right]^{(p-k-l)/2} + \left[ g({R}_{i\,\alpha}) \, g({R}_{j\alpha}) \right]^{(p-k-l)/2} \] $c_{lkp\alpha}$ are non-zero only when $p-k-l$ is even. @@ -55,10 +55,10 @@ g(r) = e^{-\kappa\, r}. \] - The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that + The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that $J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero. - - + + * Headers :noexport: #+begin_src elisp :noexport :results none (org-babel-lob-ingest "../tools/lib.org") @@ -412,10 +412,10 @@ qmckl_exit_code qmckl_get_jastrow_aord_num (qmckl_context context, int qmckl_exit_code qmckl_get_jastrow_bord_num (qmckl_context context, int64_t* const bord_num); qmckl_exit_code qmckl_get_jastrow_cord_num (qmckl_context context, int64_t* const bord_num); qmckl_exit_code qmckl_get_jastrow_type_nucl_num (qmckl_context context, int64_t* const type_nucl_num); -qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int64_t* const type_nucl_num, int64_t* size_max); -qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, int64_t* size_max); -qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, int64_t* size_max); -qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, int64_t* size_max); +qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int64_t* const type_nucl_num, const int64_t size_max); +qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max); +qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max); +qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max); #+end_src Along with these core functions, calculation of the jastrow factor @@ -474,7 +474,7 @@ qmckl_exit_code qmckl_get_jastrow_aord_num (const qmckl_context context, int64_t } assert (ctx->jastrow.aord_num > 0); - *aord_num = ctx->jastrow.aord_num; + ,*aord_num = ctx->jastrow.aord_num; return QMCKL_SUCCESS; } @@ -501,7 +501,7 @@ qmckl_exit_code qmckl_get_jastrow_bord_num (const qmckl_context context, int64_t } assert (ctx->jastrow.bord_num > 0); - *bord_num = ctx->jastrow.bord_num; + ,*bord_num = ctx->jastrow.bord_num; return QMCKL_SUCCESS; } @@ -528,7 +528,7 @@ qmckl_exit_code qmckl_get_jastrow_cord_num (const qmckl_context context, int64_t } assert (ctx->jastrow.cord_num > 0); - *cord_num = ctx->jastrow.cord_num; + ,*cord_num = ctx->jastrow.cord_num; return QMCKL_SUCCESS; } @@ -555,11 +555,15 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_num (const qmckl_context context, in } assert (ctx->jastrow.type_nucl_num > 0); - *type_nucl_num = ctx->jastrow.type_nucl_num; + ,*type_nucl_num = ctx->jastrow.type_nucl_num; return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (const qmckl_context context, int64_t * const type_nucl_vector, int64_t* size_max) { +qmckl_exit_code +qmckl_get_jastrow_type_nucl_vector (const qmckl_context context, + int64_t* const type_nucl_vector, + const int64_t size_max) +{ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return (char) 0; @@ -582,12 +586,21 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (const qmckl_context context, } assert (ctx->jastrow.type_nucl_vector != NULL); + if (size_max < ctx->jastrow.type_nucl_num) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_type_nucl_vector", + "Array too small. Expected jastrow.type_nucl_num"); + } + memcpy(type_nucl_vector, ctx->jastrow.type_nucl_vector, ctx->jastrow.type_nucl_num*sizeof(int64_t)); - (*size_max) = ctx->jastrow.type_nucl_num; return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_aord_vector (const qmckl_context context, double * const aord_vector, int64_t* size_max) { +qmckl_exit_code +qmckl_get_jastrow_aord_vector (const qmckl_context context, + double * const aord_vector, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return (char) 0; @@ -611,12 +624,20 @@ qmckl_exit_code qmckl_get_jastrow_aord_vector (const qmckl_context context, doub assert (ctx->jastrow.aord_vector != NULL); int64_t sze = (ctx->jastrow.aord_num + 1)*ctx->jastrow.type_nucl_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_aord_vector", + "Array too small. Expected (ctx->jastrow.aord_num + 1)*ctx->jastrow.type_nucl_num"); + } memcpy(aord_vector, ctx->jastrow.aord_vector, sze*sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_bord_vector (const qmckl_context context, double * const bord_vector, int64_t* size_max) { +qmckl_exit_code +qmckl_get_jastrow_bord_vector (const qmckl_context context, + double * const bord_vector, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return (char) 0; @@ -640,12 +661,20 @@ qmckl_exit_code qmckl_get_jastrow_bord_vector (const qmckl_context context, doub assert (ctx->jastrow.bord_vector != NULL); int64_t sze=ctx->jastrow.bord_num +1; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_bord_vector", + "Array too small. Expected (ctx->jastrow.bord_num + 1)"); + } memcpy(bord_vector, ctx->jastrow.bord_vector, sze*sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_cord_vector (const qmckl_context context, double * const cord_vector, int64_t* size_max) { +qmckl_exit_code +qmckl_get_jastrow_cord_vector (const qmckl_context context, + double * const cord_vector, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return (char) 0; @@ -668,14 +697,19 @@ qmckl_exit_code qmckl_get_jastrow_cord_vector (const qmckl_context context, doub } assert (ctx->jastrow.cord_vector != NULL); - + int64_t dim_cord_vect; qmckl_exit_code rc = qmckl_get_jastrow_dim_cord_vect(context, &dim_cord_vect); if (rc != QMCKL_SUCCESS) return rc; int64_t sze=dim_cord_vect * ctx->jastrow.type_nucl_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_cord_vector", + "Array too small. Expected dim_cord_vect * jastrow.type_nucl_num"); + } memcpy(cord_vector, ctx->jastrow.cord_vector, sze*sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -690,9 +724,9 @@ qmckl_exit_code qmckl_get_jastrow_cord_vector (const qmckl_context context, doub qmckl_exit_code qmckl_set_jastrow_ord_num (qmckl_context context, const int64_t aord_num, const int64_t bord_num, const int64_t cord_num); qmckl_exit_code qmckl_set_jastrow_type_nucl_num (qmckl_context context, const int64_t type_nucl_num); qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, const int64_t* type_nucl_vector, const int64_t nucl_num); -qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, int64_t size_max); -qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, int64_t size_max); -qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, int64_t size_max); +qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max); +qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max); +qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max); #+end_src #+NAME:pre2 @@ -718,7 +752,12 @@ return QMCKL_SUCCESS; #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_set_jastrow_ord_num(qmckl_context context, const int64_t aord_num, const int64_t bord_num, const int64_t cord_num) { +qmckl_exit_code +qmckl_set_jastrow_ord_num(qmckl_context context, + const int64_t aord_num, + const int64_t bord_num, + const int64_t cord_num) +{ <> if (aord_num <= 0) { @@ -750,7 +789,10 @@ qmckl_exit_code qmckl_set_jastrow_ord_num(qmckl_context context, const int64_t a <> } -qmckl_exit_code qmckl_set_jastrow_type_nucl_num(qmckl_context context, const int64_t type_nucl_num) { + +qmckl_exit_code +qmckl_set_jastrow_type_nucl_num(qmckl_context context, const int64_t type_nucl_num) +{ <> if (type_nucl_num <= 0) { @@ -766,7 +808,12 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_num(qmckl_context context, const int <> } -qmckl_exit_code qmckl_set_jastrow_type_nucl_vector(qmckl_context context, int64_t const * type_nucl_vector, const int64_t nucl_num) { + +qmckl_exit_code +qmckl_set_jastrow_type_nucl_vector(qmckl_context context, + int64_t const * type_nucl_vector, + const int64_t nucl_num) +{ <> int32_t mask = 1 << 2; @@ -816,7 +863,12 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector(qmckl_context context, int64_ <> } -qmckl_exit_code qmckl_set_jastrow_aord_vector(qmckl_context context, double const * aord_vector, int64_t size_max) { + +qmckl_exit_code +qmckl_set_jastrow_aord_vector(qmckl_context context, + double const * aord_vector, + const int64_t size_max) +{ <> int32_t mask = 1 << 3; @@ -849,7 +901,7 @@ qmckl_exit_code qmckl_set_jastrow_aord_vector(qmckl_context context, double cons return qmckl_failwith( context, rc, "qmckl_set_ord_vector", NULL); -} + } } qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; @@ -878,7 +930,12 @@ qmckl_exit_code qmckl_set_jastrow_aord_vector(qmckl_context context, double cons <> } -qmckl_exit_code qmckl_set_jastrow_bord_vector(qmckl_context context, double const * bord_vector, int64_t size_max) { + +qmckl_exit_code +qmckl_set_jastrow_bord_vector(qmckl_context context, + double const * bord_vector, + const int64_t size_max) +{ <> int32_t mask = 1 << 4; @@ -936,7 +993,12 @@ qmckl_exit_code qmckl_set_jastrow_bord_vector(qmckl_context context, double cons <> } -qmckl_exit_code qmckl_set_jastrow_cord_vector(qmckl_context context, double const * cord_vector, int64_t size_max) { + +qmckl_exit_code +qmckl_set_jastrow_cord_vector(qmckl_context context, + double const * cord_vector, + const int64_t size_max) +{ <> int32_t mask = 1 << 5; @@ -1069,6 +1131,7 @@ double* elec_coord = &(n2_elec_coord[0][0][0]); const double* nucl_charge = n2_charge; int64_t nucl_num = n2_nucl_num; double* nucl_coord = &(n2_nucl_coord[0][0]); +int64_t size_max; /* Provide Electron data */ @@ -1246,11 +1309,17 @@ assert(qmckl_nucleus_provided(context)); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_asymp_jasb(qmckl_context context, double* const asymp_jasb, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_asymp_jasb(qmckl_context context, + double* const asymp_jasb, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_asymp_jasb(qmckl_context context, double* const asymp_jasb, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_asymp_jasb(qmckl_context context, + double* const asymp_jasb, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -1265,8 +1334,13 @@ qmckl_exit_code qmckl_get_jastrow_asymp_jasb(qmckl_context context, double* cons assert (ctx != NULL); size_t sze = 2; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_asymp_jasb", + "Array too small. Expected 2"); + } memcpy(asymp_jasb, ctx->jastrow.asymp_jasb, sze * sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -1409,7 +1483,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( } asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); - asymp_jasb[0] = asym_one; + asymp_jasb[0] = asym_one; asymp_jasb[1] = 0.5 * asym_one; for (int i = 0 ; i <= 1; ++i) { @@ -1418,7 +1492,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( x = x * kappa_inv; asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x; } - } + } return QMCKL_SUCCESS; } @@ -1433,7 +1507,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const int64_t bord_num, const double* bord_vector, const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + double* const asymp_jasb ); #+end_src @@ -1457,7 +1531,7 @@ print("asym_one : ", asym_one) print("asymp_jasb[0] : ", asymp_jasb[0]) print("asymp_jasb[1] : ", asymp_jasb[1]) #+end_src - + #+RESULTS: asymp_jasb : asym_one : 0.43340325572525706 : asymp_jasb[0] : 0.5323750557252571 @@ -1500,8 +1574,7 @@ assert(rc == QMCKL_SUCCESS); assert(qmckl_jastrow_provided(context)); double asymp_jasb[2]; -int64_t size_max=0; -rc = qmckl_get_jastrow_asymp_jasb(context, asymp_jasb,&size_max); +rc = qmckl_get_jastrow_asymp_jasb(context, asymp_jasb,2); // calculate asymp_jasb assert(fabs(asymp_jasb[0]-0.5323750557252571) < 1.e-12); @@ -1521,11 +1594,17 @@ f_{ee} = \sum_{i,jelectron.walk_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_ee", + "Array too small. Expected walk_num"); + } memcpy(factor_ee, ctx->jastrow.factor_ee, sze*sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -1804,8 +1888,7 @@ print("factor_ee :",factor_ee) assert(qmckl_jastrow_provided(context)); double factor_ee[walk_num]; -size_max=0; -rc = qmckl_get_jastrow_factor_ee(context, factor_ee, &size_max); +rc = qmckl_get_jastrow_factor_ee(context, factor_ee, walk_num); // calculate factor_ee assert(fabs(factor_ee[0]+4.282760865958113) < 1.e-12); @@ -1824,11 +1907,17 @@ assert(fabs(factor_ee[0]+4.282760865958113) < 1.e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_factor_ee_deriv_e(qmckl_context context, double* const factor_ee_deriv_e, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_factor_ee_deriv_e(qmckl_context context, + double* const factor_ee_deriv_e, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_factor_ee_deriv_e(qmckl_context context, double* const factor_ee_deriv_e, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_factor_ee_deriv_e(qmckl_context context, + double* const factor_ee_deriv_e, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -1843,8 +1932,14 @@ qmckl_exit_code qmckl_get_jastrow_factor_ee_deriv_e(qmckl_context context, doubl assert (ctx != NULL); int64_t sze = ctx->electron.walk_num * 4 * ctx->electron.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_ee_deriv_e", + "Array too small. Expected 4*walk_num*elec_num"); + } + memcpy(factor_ee_deriv_e, ctx->jastrow.factor_ee_deriv_e, sze * sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -2002,7 +2097,7 @@ integer function qmckl_compute_factor_ee_deriv_e_f(context, walk_num, elec_num, dx(1) = ee_distance_rescaled_deriv_e(1, i, j, nw) dx(2) = ee_distance_rescaled_deriv_e(2, i, j, nw) dx(3) = ee_distance_rescaled_deriv_e(3, i, j, nw) - dx(4) = ee_distance_rescaled_deriv_e(4, i, j, nw) + dx(4) = ee_distance_rescaled_deriv_e(4, i, j, nw) if((i .LE. up_num .AND. j .LE. up_num ) .OR. & (i .GT. up_num .AND. j .GT. up_num)) then @@ -2217,8 +2312,7 @@ assert(qmckl_jastrow_provided(context)); // calculate factor_ee_deriv_e double factor_ee_deriv_e[walk_num][4][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_factor_ee_deriv_e(context, &(factor_ee_deriv_e[0][0][0]),&size_max); +rc = qmckl_get_jastrow_factor_ee_deriv_e(context, &(factor_ee_deriv_e[0][0][0]),walk_num*4*elec_num); // check factor_ee_deriv_e assert(fabs(factor_ee_deriv_e[0][0][0]-0.16364894652107934) < 1.e-12); @@ -2240,11 +2334,17 @@ f_{en} = \sum_{i,jelectron.walk_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_en", + "Array too small. Expected walk_num"); + } memcpy(factor_en, ctx->jastrow.factor_en, sze*sizeof(double)); - (*size_max)=sze; return QMCKL_SUCCESS; } @@ -2517,8 +2622,7 @@ print("factor_en :",factor_en) assert(qmckl_jastrow_provided(context)); double factor_en[walk_num]; -size_max=0; -rc = qmckl_get_jastrow_factor_en(context, factor_en,&size_max); +rc = qmckl_get_jastrow_factor_en(context, factor_en,walk_num); // calculate factor_en assert(fabs(factor_en[0]+5.865822569188727) < 1.e-12); @@ -2534,11 +2638,17 @@ assert(fabs(factor_en[0]+5.865822569188727) < 1.e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_factor_en_deriv_e(qmckl_context context, double* const factor_en_deriv_e, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_factor_en_deriv_e(qmckl_context context, + double* const factor_en_deriv_e, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_factor_en_deriv_e(qmckl_context context, double* const factor_en_deriv_e, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_factor_en_deriv_e(qmckl_context context, + double* const factor_en_deriv_e, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -2553,8 +2663,13 @@ qmckl_exit_code qmckl_get_jastrow_factor_en_deriv_e(qmckl_context context, doubl assert (ctx != NULL); int64_t sze = ctx->electron.walk_num * 4 * ctx->electron.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_en_deriv_e", + "Array too small. Expected 4*walk_num*elec_num"); + } memcpy(factor_en_deriv_e, ctx->jastrow.factor_en_deriv_e, sze*sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -2921,8 +3036,7 @@ assert(qmckl_jastrow_provided(context)); // calculate factor_en_deriv_e double factor_en_deriv_e[walk_num][4][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_factor_en_deriv_e(context, &(factor_en_deriv_e[0][0][0]),&size_max); +rc = qmckl_get_jastrow_factor_en_deriv_e(context, &(factor_en_deriv_e[0][0][0]),walk_num*4*elec_num); // check factor_en_deriv_e assert(fabs(factor_en_deriv_e[0][0][0]-0.11609919541763383) < 1.e-12); @@ -2946,11 +3060,17 @@ assert(fabs(factor_en_deriv_e[0][3][0]+0.9667363412285741 ) < 1.e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_een_rescaled_e(qmckl_context context, double* const distance_rescaled, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_e(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_een_rescaled_e(qmckl_context context, double* const distance_rescaled, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_e(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -2965,8 +3085,13 @@ qmckl_exit_code qmckl_get_jastrow_een_rescaled_e(qmckl_context context, double* assert (ctx != NULL); size_t sze = ctx->electron.num * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_een_rescaled_e", + "Array too small. Expected ctx->electron.num * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1)"); + } memcpy(distance_rescaled, ctx->jastrow.een_rescaled_e, sze * sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -3178,7 +3303,7 @@ end function qmckl_compute_een_rescaled_e_f #+end_src *** Test - + #+begin_src python :results output :exports none :noweb yes import numpy as np @@ -3242,8 +3367,7 @@ assert(qmckl_electron_provided(context)); double een_rescaled_e[walk_num][(cord_num + 1)][elec_num][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_een_rescaled_e(context, &(een_rescaled_e[0][0][0][0]),&size_max); +rc = qmckl_get_jastrow_een_rescaled_e(context, &(een_rescaled_e[0][0][0][0]),elec_num*elec_num*(cord_num+1)*walk_num); // value of (0,2,1) assert(fabs(een_rescaled_e[0][1][0][2]-0.08084493981483197) < 1.e-12); @@ -3268,11 +3392,17 @@ assert(fabs(een_rescaled_e[0][2][1][5]-0.3424402276009091) < 1.e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_een_rescaled_e_deriv_e(qmckl_context context, double* const distance_rescaled, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_e_deriv_e(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_een_rescaled_e_deriv_e(qmckl_context context, double* const distance_rescaled, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_e_deriv_e(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -3287,8 +3417,13 @@ qmckl_exit_code qmckl_get_jastrow_een_rescaled_e_deriv_e(qmckl_context context, assert (ctx != NULL); size_t sze = ctx->electron.num * 4 * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_een_deriv_e", + "Array too small. Expected ctx->electron.num * 4 * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1)"); + } memcpy(distance_rescaled, ctx->jastrow.een_rescaled_e_deriv_e, sze * sizeof(double)); - (*size_max) = sze; return QMCKL_SUCCESS; } @@ -3609,8 +3744,9 @@ for l in range(0,cord_num+1): #+begin_src c :tangle (eval c_test) //assert(qmckl_electron_provided(context)); double een_rescaled_e_deriv_e[walk_num][(cord_num + 1)][elec_num][4][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_een_rescaled_e_deriv_e(context, &(een_rescaled_e_deriv_e[0][0][0][0][0]),&size_max); +size_max=walk_num*(cord_num + 1)*elec_num*4*elec_num; +rc = qmckl_get_jastrow_een_rescaled_e_deriv_e(context, + &(een_rescaled_e_deriv_e[0][0][0][0][0]),size_max); // value of (0,0,0,2,1) assert(fabs(een_rescaled_e_deriv_e[0][1][0][0][2] + 0.05991352796887283 ) < 1.e-12); @@ -3635,11 +3771,17 @@ assert(fabs(een_rescaled_e_deriv_e[0][2][1][0][5] + 0.5880599146214673 ) < 1. *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_een_rescaled_n(qmckl_context context, double* const distance_rescaled, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_n(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_een_rescaled_n(qmckl_context context, double* const distance_rescaled, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_n(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -3654,8 +3796,13 @@ qmckl_exit_code qmckl_get_jastrow_een_rescaled_n(qmckl_context context, double* assert (ctx != NULL); size_t sze = ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_een_deriv_e", + "Array too small. Expected ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1)"); + } memcpy(distance_rescaled, ctx->jastrow.een_rescaled_n, sze * sizeof(double)); - (*size_max)=sze; return QMCKL_SUCCESS; } @@ -3914,8 +4061,8 @@ print(" een_rescaled_n[1, 5, 2] = ",een_rescaled_n[1, 5, 2]) assert(qmckl_electron_provided(context)); double een_rescaled_n[walk_num][(cord_num + 1)][nucl_num][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_een_rescaled_n(context, &(een_rescaled_n[0][0][0][0]),&size_max); +size_max=walk_num*(cord_num + 1)*nucl_num*elec_num; +rc = qmckl_get_jastrow_een_rescaled_n(context, &(een_rescaled_n[0][0][0][0]),size_max); // value of (0,2,1) assert(fabs(een_rescaled_n[0][1][0][2]-0.10612983920006765) < 1.e-12); @@ -3936,11 +4083,17 @@ assert(fabs(een_rescaled_n[0][2][1][5]-0.01343938025140174) < 1.e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_een_rescaled_n_deriv_e(qmckl_context context, double* const distance_rescaled, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_n_deriv_e(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_een_rescaled_n_deriv_e(qmckl_context context, double* const distance_rescaled, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_een_rescaled_n_deriv_e(qmckl_context context, + double* const distance_rescaled, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -3955,8 +4108,13 @@ qmckl_exit_code qmckl_get_jastrow_een_rescaled_n_deriv_e(qmckl_context context, assert (ctx != NULL); size_t sze = ctx->electron.num * 4 * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_een_deriv_e", + "Array too small. Expected ctx->electron.num * 4 * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1)"); + } memcpy(distance_rescaled, ctx->jastrow.een_rescaled_n_deriv_e, sze * sizeof(double)); - (*size_max)=sze; return QMCKL_SUCCESS; } @@ -4215,9 +4373,9 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f end function qmckl_compute_factor_een_rescaled_n_deriv_e #+end_src - + *** Test - + #+begin_src python :results output :exports none :noweb yes import numpy as np @@ -4287,8 +4445,8 @@ print(" een_rescaled_n_deriv_e[2, 1, 6, 2] = ",een_rescaled_n_deriv_e[5, 0, 1, 2 assert(qmckl_electron_provided(context)); double een_rescaled_n_deriv_e[walk_num][(cord_num + 1)][nucl_num][4][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_een_rescaled_n_deriv_e(context, &(een_rescaled_n_deriv_e[0][0][0][0][0]),&size_max); +size_max=walk_num*(cord_num + 1)*nucl_num*4*elec_num; +rc = qmckl_get_jastrow_een_rescaled_n_deriv_e(context, &(een_rescaled_n_deriv_e[0][0][0][0][0]),size_max); // value of (0,2,1) assert(fabs(een_rescaled_n_deriv_e[0][1][0][0][2]+0.07633444246999128 ) < 1.e-12); @@ -5330,11 +5488,17 @@ assert(fabs(dtmp_c[0][1][0][0][0][0] - 0.237440520852232) < 1e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_factor_een(qmckl_context context, double* const factor_een, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_factor_een(qmckl_context context, + double* const factor_een, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_factor_een(qmckl_context context, double* const factor_een, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_factor_een(qmckl_context context, + double* const factor_een, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -5348,9 +5512,14 @@ qmckl_exit_code qmckl_get_jastrow_factor_een(qmckl_context context, double* cons qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - int64_t sze = ctx->electron.walk_num * ctx->electron.num; + int64_t sze = ctx->electron.walk_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_een", + "Array too small. Expected walk_num"); + } memcpy(factor_een, ctx->jastrow.factor_een, sze*sizeof(double)); - (*size_max)=sze; return QMCKL_SUCCESS; } @@ -5804,8 +5973,7 @@ print("factor_een:",factor_een) assert(qmckl_jastrow_provided(context)); double factor_een[walk_num]; -size_max=0; -rc = qmckl_get_jastrow_factor_een(context, &(factor_een[0]),&size_max); +rc = qmckl_get_jastrow_factor_een(context, &(factor_een[0]),walk_num); assert(fabs(factor_een[0] + 0.37407972141304213) < 1e-12); #+end_src @@ -5819,11 +5987,17 @@ assert(fabs(factor_een[0] + 0.37407972141304213) < 1e-12); *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_jastrow_factor_een_deriv_e(qmckl_context context, double* const factor_een_deriv_e, int64_t* size_max); +qmckl_exit_code +qmckl_get_jastrow_factor_een_deriv_e(qmckl_context context, + double* const factor_een_deriv_e, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_jastrow_factor_een_deriv_e(qmckl_context context, double* const factor_een_deriv_e, int64_t* size_max) +qmckl_exit_code +qmckl_get_jastrow_factor_een_deriv_e(qmckl_context context, + double* const factor_een_deriv_e, + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -5837,9 +6011,14 @@ qmckl_exit_code qmckl_get_jastrow_factor_een_deriv_e(qmckl_context context, doub qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - int64_t sze = ctx->electron.walk_num * ctx->electron.num; + int64_t sze = ctx->electron.walk_num * 4 * ctx->electron.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_jastrow_factor_een_deriv_e", + "Array too small. Expected 4*walk_num*elec_num"); + } memcpy(factor_een_deriv_e, ctx->jastrow.factor_een_deriv_e, sze*sizeof(double)); - (*size_max)=sze; return QMCKL_SUCCESS; } @@ -6327,7 +6506,7 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src - + *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np @@ -6384,11 +6563,10 @@ print("factor_een:",factor_een) /* Check if Jastrow is properly initialized */ assert(qmckl_jastrow_provided(context)); -double factor_een_deriv_e[walk_num][elec_num]; -size_max=0; -rc = qmckl_get_jastrow_factor_een_deriv_e(context, &(factor_een_deriv_e[0][0]),&size_max); +double factor_een_deriv_e[4][walk_num][elec_num]; +rc = qmckl_get_jastrow_factor_een_deriv_e(context, &(factor_een_deriv_e[0][0][0]),4*walk_num*elec_num); -assert(fabs(factor_een_deriv_e[0][0] + 0.0005481671107226865) < 1e-12); +assert(fabs(factor_een_deriv_e[0][0][0] + 0.0005481671107226865) < 1e-12); #+end_src * End of files :noexport: From c8a452dc55c31a6e06ef39a43fb220dc07f2b1df Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 15 Feb 2022 11:10:07 +0100 Subject: [PATCH 03/19] Added --with-icc and --with-ifort to configure --- configure.ac | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 6654fd7..6ec4830 100644 --- a/configure.ac +++ b/configure.ac @@ -46,6 +46,22 @@ AS_IF([test -d ${srcdir}/.git], [enable_maintainer_mode="no"] ) +AC_ARG_WITH(ifort, [AS_HELP_STRING([--with-ifort],[Use Intel Fortran compiler])], with_ifort=$withval, with_ifort=no) +AS_IF([test "$with_ifort" == "yes"], [ + FC=ifort + FCFLAGS="-xHost -ip -O2 -ftz -finline -g -mkl=sequential" ]) + +AC_ARG_WITH(icc, [AS_HELP_STRING([--with-icc],[Use Intel C compiler])], with_icc=$withval, with_icc=no) +AS_IF([test "$with_icc" == "yes"], [ + FC=icc + CFLAGS="-xHost -ip -O2 -ftz -finline -g -mkl=sequential" ]) + +AS_IF([test "$with_icc"."$with_ifort" == "yes.yes"], [ + ax_blas_ok="yes" + ax_lapack_ok="yes" + BLAS_LIBS="" + LAPACK_LIBS=""]) + AM_PROG_AR AM_MAINTAINER_MODE() LT_INIT @@ -177,6 +193,7 @@ AX_BLAS([], [AC_MSG_ERROR([BLAS was not found.])]) ## LAPACK AX_LAPACK([], [AC_MSG_ERROR([LAPACK was not found.])]) +AS_IF([test "$BLAS_LIBS" == "$LAPACK_LIBS"], [BLAS_LIBS=""]) # Specific options required with some compilers @@ -314,7 +331,7 @@ fi #mkl-dynamic-lp64-seq PKG_LIBS="$PKG_LIBS $LIBS" -LIBS="$BLAS_LIBS $LAPACK_LIBS $BLAS_LIBS $PKG_LIBS" +LIBS="$BLAS_LIBS $LAPACK_LIBS $PKG_LIBS" CFLAGS="$CFLAGS $PKG_CFLAGS" AC_SUBST([PKG_LIBS]) AC_SUBST([PKG_CFLAGS]) From d83dad53cf6c02e255769988db50b232b3a0f46f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 15 Feb 2022 16:42:47 +0100 Subject: [PATCH 04/19] OpenMP in HPC version --- configure.ac | 17 +++++++++-------- org/qmckl_ao.org | 34 +++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/configure.ac b/configure.ac index 6ec4830..857367a 100644 --- a/configure.ac +++ b/configure.ac @@ -53,7 +53,7 @@ AS_IF([test "$with_ifort" == "yes"], [ AC_ARG_WITH(icc, [AS_HELP_STRING([--with-icc],[Use Intel C compiler])], with_icc=$withval, with_icc=no) AS_IF([test "$with_icc" == "yes"], [ - FC=icc + CC=icc CFLAGS="-xHost -ip -O2 -ftz -finline -g -mkl=sequential" ]) AS_IF([test "$with_icc"."$with_ifort" == "yes.yes"], [ @@ -116,13 +116,14 @@ AC_CHECK_HEADERS([assert.h errno.h math.h pthread.h stdbool.h stdint.h stdio.h s AC_CHECK_LIB([pthread], [pthread_create]) # OpenMP -#AC_ARG_WITH(openmp, [AS_HELP_STRING([--with-openmp],[enable OpenMP])], with_omp=$withval, with_omp=no) -#if test "x$with_omp" = xyes; then -# AC_DEFINE([HAVE_OPENMP], [1], [Define to use OpenMP threading.]) -# AX_OPENMP([], -# [AC_MSG_ERROR([Could not find OpenMP flags; configure with --without-openmp])]) -# CFLAGS="${CFLAGS} ${OPENMP_CFLAGS}" -#fi +AC_ARG_WITH(openmp, [AS_HELP_STRING([--with-openmp],[activate OpenMP])], with_omp=$withval, with_omp=no) +if test "x$with_omp" = xyes; then + AC_DEFINE([HAVE_OPENMP], [1], [Define to use OpenMP threading.]) + AX_OPENMP([], + [AC_MSG_ERROR([Could not find OpenMP flags; configure with --without-openmp])]) + CFLAGS="${CFLAGS} ${OPENMP_CFLAGS}" + FCFLAGS="${CFLAGS} ${OPENMP_FCFLAGS}" +fi # CHAMELEON AC_ARG_WITH(chameleon, diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8783884..df988c8 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2466,11 +2466,11 @@ for (int64_t i=0 ; i < ao_num ; ++i) { |----------------------+-----------------------------------+----------------------------------------------------------------------------------------------| | Variable | Type | Description | |----------------------+-----------------------------------+----------------------------------------------------------------------------------------------| - | ~primitive_vgl~ | ~double[point_num][5][prim_num]~ | Value, gradients, Laplacian of the primitives at current positions | + | ~primitive_vgl~ | ~double[point_num][5][prim_num]~ | Value, gradients, Laplacian of the primitives at current positions | | ~primitive_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the primitives at current positions | | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | Value, gradients, Laplacian of the primitives at current positions | | ~shell_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions | - | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | Value, gradients, Laplacian of the primitives at current positions | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | Value, gradients, Laplacian of the primitives at current positions | | ~ao_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions | @@ -4763,7 +4763,7 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & integer :: lstart(0:20) double precision :: x, y, z, r2, s1, s2, s3, s4, s5, s6 double precision :: cutoff, v, two_a - integer*8 :: iprim_start , iprim_end, iprim + integer*8 :: iprim_start , iprim_end, iprim, size_max integer, external :: qmckl_ao_polynomial_transp_vgl_f double precision, allocatable :: poly_vgl(:,:) @@ -4772,8 +4772,7 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & integer :: nidx, idx, n double precision, allocatable :: ar2(:), expo_(:), c_(:) - allocate(poly_vgl(ao_num,5), powers(3,ao_num), ao_index(ao_num)) - allocate(c_(prim_num), expo_(prim_num), ar2(prim_num)) + allocate(ao_index(ao_num+1)) ! Pre-computed data do l=0,20 @@ -4781,6 +4780,7 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & end do k=1 + size_max = 0 do inucl=1,nucl_num ishell_start = nucleus_index(inucl) + 1 ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) @@ -4789,13 +4789,30 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & ao_index(ishell) = k k = k + lstart(l+1) - lstart(l) end do + size_max = max(size_max, lstart(l+1)) end do + ao_index(ishell_end+1) = ao_num+1 + info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. cutoff = -dlog(1.d-12) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED (point_num, coord, nucl_coord, nucl_num, cutoff, & + !$OMP nucleus_range, context, nucleus_max_ang_mom, ao_num, & + !$OMP nucleus_index, nucleus_shell_num, shell_prim_index, & + !$OMP shell_prim_num, expo, coef_normalized, size_max, prim_num, & + !$OMP shell_ang_mom, ao_index, lstart, ao_vgl, ao_factor) & + !$OMP PRIVATE (ipoint, inucl, x, y, z, e_coord, r2, info, & + !$OMP n_coord, n_poly, powers, poly_vgl, ishell_start, k, & + !$OMP ishell_end, ishell, iprim_end, iprim_start, nidx, l, & + !$OMP iprim, v, expo_, c_, s1, s2, s3, s4, s5, s6, n, il, ar2) + + allocate(c_(prim_num), expo_(prim_num), ar2(prim_num), & + powers(3,size_max), poly_vgl(size_max,5)) + !$OMP DO do ipoint = 1, point_num e_coord(1) = coord(ipoint,1) e_coord(2) = coord(ipoint,2) @@ -4819,7 +4836,7 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & ! Compute polynomials info = qmckl_ao_polynomial_transp_vgl_f(context, e_coord, n_coord, & nucleus_max_ang_mom(inucl), n_poly, powers, 3_8, & - poly_vgl, int(ao_num,8)) + poly_vgl, size_max) ! Loop over shells ishell_start = nucleus_index(inucl) + 1 @@ -4887,7 +4904,10 @@ integer function qmckl_compute_ao_vgl_hpc_f(context, & end do end do - deallocate(poly_vgl, powers) + deallocate(poly_vgl, powers, c_, expo_, ar2) + !$OMP END PARALLEL + + deallocate(ao_index) end function qmckl_compute_ao_vgl_hpc_f #+end_src From 1c681d4d7e7b4b6133da60eac04e3f3c79bf020c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 16 Feb 2022 00:21:37 +0100 Subject: [PATCH 05/19] Rewrote AOs HPC in C --- README.md | 42 ++++- configure.ac | 8 +- org/qmckl_ao.org | 401 +++++++++++++++++++---------------------------- 3 files changed, 200 insertions(+), 251 deletions(-) diff --git a/README.md b/README.md index fb1281b..4ab5bae 100644 --- a/README.md +++ b/README.md @@ -36,12 +36,39 @@ make check ## For users -Obtain a source distribution and run +Obtain a source distribution. + +To build the documentation version: ``` -./configure -make -make check +./configure +``` + +To build an optimized version with Intel compilers: +``` +./configure \ + --with-icc \ + --with-ifort \ + --enable-hpc \ + --with-openmp +``` + +To build an optimized version with GCC: +``` +./configure \ + CC=gcc \ + CFLAGS="-g -O2 -march=native -flto -fno-trapping-math -fno-math-errno -ftree-vectorize" \ + FC=gfortran \ + FCFLAGS="-g -O2 -march=native -flto -ftree-vectorize" \ + --enable-hpc \ + --with-openmp +``` + + +Then, compile with: +``` +make -j +make -j check sudo make install sudo make installcheck ``` @@ -54,7 +81,12 @@ by the preprocessor otherwise). To enable vfc_ci support, the library should be configured with the following command : ``` -./configure --prefix=$PWD/_install \ --enable-vfc_ci --host=x86_64 \ CC="verificarlo-f" FC="verificarlo-f" +./configure \ + CC="verificarlo-f" \ + FC="verificarlo-f" \ + --prefix=$PWD/_install \ + --enable-vfc_ci \ + --host=x86_64 \ ``` where CC and FC are set to verificarlo-f, and support is explicitely enabled diff --git a/configure.ac b/configure.ac index 857367a..d99f4a0 100644 --- a/configure.ac +++ b/configure.ac @@ -204,10 +204,10 @@ case $FC in FCFLAGS="$FCFLAGS -nofor-main" ;; - gfortran*) - # Order is important here - FCFLAGS="-cpp $FCFLAGS" - ;; +#gfortran*) +# # Order is important here +# FCFLAGS="-cpp $FCFLAGS" +# ;; esac diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index df988c8..7aca29b 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -96,6 +96,8 @@ int main() { #include #include #include +#include +#include #include "qmckl.h" #include "qmckl_context_private_type.h" @@ -4725,195 +4727,183 @@ end function qmckl_compute_ao_vgl_doc_f | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | - #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_ao_vgl_hpc_f(context, & - ao_num, shell_num, prim_num, point_num, nucl_num, & - coord, nucl_coord, nucleus_index, nucleus_shell_num, & - nucleus_range, nucleus_max_ang_mom, shell_ang_mom, & - shell_prim_index, shell_prim_num, ao_factor, expo, & - coef_normalized, ao_vgl) & - result(info) - use qmckl - implicit none - integer(qmckl_context), intent(in) :: context - integer*8 , intent(in) :: ao_num - integer*8 , intent(in) :: shell_num - integer*8 , intent(in) :: prim_num - integer*8 , intent(in) :: point_num - integer*8 , intent(in) :: nucl_num - double precision , intent(in) :: coord(point_num,3) - double precision , intent(in) :: nucl_coord(nucl_num,3) - integer*8 , intent(in) :: nucleus_index(nucl_num) - integer*8 , intent(in) :: nucleus_shell_num(nucl_num) - double precision , intent(in) :: nucleus_range(nucl_num) - integer , intent(in) :: nucleus_max_ang_mom(nucl_num) - integer , intent(in) :: shell_ang_mom(shell_num) - integer*8 , intent(in) :: shell_prim_index(shell_num) - integer*8 , intent(in) :: shell_prim_num(shell_num) - double precision , intent(in) :: ao_factor(ao_num) - double precision , intent(in) :: expo(prim_num) - double precision , intent(in) :: coef_normalized(prim_num) - double precision , intent(out) :: ao_vgl(ao_num,5,point_num) + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_compute_ao_vgl_hpc ( + const qmckl_context context, + const int64_t ao_num, + const int64_t shell_num, + const int64_t prim_num, + const int64_t point_num, + const int64_t nucl_num, + const double* coord, + const double* nucl_coord, + const int64_t* nucleus_index, + const int64_t* nucleus_shell_num, + const double* nucleus_range, + const int32_t* nucleus_max_ang_mom, + const int32_t* shell_ang_mom, + const int64_t* shell_prim_index, + const int64_t* shell_prim_num, + const double* ao_factor, + const double* expo, + const double* coef_normalized, + double* const ao_vgl ) +{ + int32_t lstart[24]; + for (int32_t l=0 ; l<24 ; ++l) { + lstart[l] = l*(l+1)*(l+2)/6; + } - double precision :: e_coord(3), n_coord(3) - integer*8 :: n_poly - integer :: l, il, k - integer*8 :: ipoint, inucl, ishell - integer*8 :: ishell_start, ishell_end - integer :: lstart(0:20) - double precision :: x, y, z, r2, s1, s2, s3, s4, s5, s6 - double precision :: cutoff, v, two_a - integer*8 :: iprim_start , iprim_end, iprim, size_max - integer, external :: qmckl_ao_polynomial_transp_vgl_f + int64_t ao_index[shell_num+1]; + int64_t size_max = 0; + { + int64_t k=0; + for (int inucl=0 ; inucl < nucl_num ; ++inucl) { + const int64_t ishell_start = nucleus_index[inucl]; + const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; + for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { + const int l = shell_ang_mom[ishell]; + ao_index[ishell] = k; + k += lstart[l+1] - lstart[l]; + size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max; + } + } + ao_index[shell_num] = ao_num+1; + } - double precision, allocatable :: poly_vgl(:,:) - integer , allocatable :: powers(:,:), ao_index(:) + /* Don't compute polynomials when the radial part is zero. */ + double cutoff = -log(1.e-12); - integer :: nidx, idx, n - double precision, allocatable :: ar2(:), expo_(:), c_(:) +#ifdef HAVE_OPENMP + #pragma omp parallel +#endif + { + double c_[prim_num]; + double expo_[prim_num]; + double ar2[prim_num]; + int32_t powers[prim_num]; + double poly_vgl[5][size_max]; - allocate(ao_index(ao_num+1)) +#ifdef HAVE_OPENMP + #pragma omp for +#endif + for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { + const double e_coord[3] = { coord[ipoint], + coord[ipoint + point_num], + coord[ipoint + 2*point_num] }; - ! Pre-computed data - do l=0,20 - lstart(l) = l*(l+1)*(l+2)/6 +1 - end do + for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) { + const double n_coord[3] = { nucl_coord[inucl], + nucl_coord[inucl + nucl_num], + nucl_coord[inucl + 2*nucl_num] }; - k=1 - size_max = 0 - do inucl=1,nucl_num - ishell_start = nucleus_index(inucl) + 1 - ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) - do ishell = ishell_start, ishell_end - l = shell_ang_mom(ishell) - ao_index(ishell) = k - k = k + lstart(l+1) - lstart(l) - end do - size_max = max(size_max, lstart(l+1)) - end do - ao_index(ishell_end+1) = ao_num+1 - + /* Test if the point is in the range of the nucleus */ + const double x = e_coord[0] - n_coord[0]; + const double y = e_coord[1] - n_coord[1]; + const double z = e_coord[2] - n_coord[2]; - info = QMCKL_SUCCESS + const double r2 = x*x + y*y + z*z; - ! Don't compute polynomials when the radial part is zero. - cutoff = -dlog(1.d-12) + if (r2 > cutoff * nucleus_range[inucl]) { + continue; + } - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED (point_num, coord, nucl_coord, nucl_num, cutoff, & - !$OMP nucleus_range, context, nucleus_max_ang_mom, ao_num, & - !$OMP nucleus_index, nucleus_shell_num, shell_prim_index, & - !$OMP shell_prim_num, expo, coef_normalized, size_max, prim_num, & - !$OMP shell_ang_mom, ao_index, lstart, ao_vgl, ao_factor) & - !$OMP PRIVATE (ipoint, inucl, x, y, z, e_coord, r2, info, & - !$OMP n_coord, n_poly, powers, poly_vgl, ishell_start, k, & - !$OMP ishell_end, ishell, iprim_end, iprim_start, nidx, l, & - !$OMP iprim, v, expo_, c_, s1, s2, s3, s4, s5, s6, n, il, ar2) + int64_t n_poly; + const qmckl_exit_code rc = + qmckl_ao_polynomial_transp_vgl(context, e_coord, n_coord, + nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3, + &(poly_vgl[0][0]), size_max); - allocate(c_(prim_num), expo_(prim_num), ar2(prim_num), & - powers(3,size_max), poly_vgl(size_max,5)) - !$OMP DO - do ipoint = 1, point_num - e_coord(1) = coord(ipoint,1) - e_coord(2) = coord(ipoint,2) - e_coord(3) = coord(ipoint,3) - do inucl=1,nucl_num - n_coord(1) = nucl_coord(inucl,1) - n_coord(2) = nucl_coord(inucl,2) - n_coord(3) = nucl_coord(inucl,3) + assert (rc == QMCKL_SUCCESS); + + const int64_t ishell_start = nucleus_index[inucl]; + const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; + for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - ! Test if the point is in the range of the nucleus - x = e_coord(1) - n_coord(1) - y = e_coord(2) - n_coord(2) - z = e_coord(3) - n_coord(3) + /* /!\ Gaussian fuctions */ + int64_t nidx = 0; + const int64_t iprim_start = shell_prim_index[ishell]; + const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell]; + for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { + const double v = expo[iprim]*r2; + if (v <= cutoff) { + ar2[nidx] = v; + c_[nidx] = coef_normalized[iprim]; + expo_[nidx] = expo[iprim]; + ++nidx; + } + } - r2 = x*x + y*y + z*z + double s1_ = 0.; + double s5_ = 0.; + double s6_ = 0.; + for (int idx=0 ; idx cutoff*nucleus_range(inucl)) then - cycle - end if + const int32_t l = shell_ang_mom[ishell]; + const int32_t n = lstart[l+1]-lstart[l]; + const int64_t k = ao_index[ishell]; - ! Compute polynomials - info = qmckl_ao_polynomial_transp_vgl_f(context, e_coord, n_coord, & - nucleus_max_ang_mom(inucl), n_poly, powers, 3_8, & - poly_vgl, size_max) + double* const ao_vgl_1 = &(ao_vgl[ipoint*5*ao_num+k]); + double* const ao_vgl_2 = &(ao_vgl_1[ ao_num]); + double* const ao_vgl_3 = &(ao_vgl_1[2*ao_num]); + double* const ao_vgl_4 = &(ao_vgl_1[3*ao_num]); + double* const ao_vgl_5 = &(ao_vgl_1[4*ao_num]); - ! Loop over shells - ishell_start = nucleus_index(inucl) + 1 - ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + if (nidx > 0) { + const int64_t idx = lstart[l]; + const double* poly_vgl_1 = &(poly_vgl[0][idx]); + const double* poly_vgl_2 = &(poly_vgl[1][idx]); + const double* poly_vgl_3 = &(poly_vgl[2][idx]); + const double* poly_vgl_4 = &(poly_vgl[3][idx]); + const double* poly_vgl_5 = &(poly_vgl[4][idx]); + const double* f = &(ao_factor[k]); - do ishell = ishell_start, ishell_end - iprim_start = shell_prim_index(ishell) + 1 - iprim_end = shell_prim_index(ishell) + shell_prim_num(ishell) + for (int64_t il=0 ; il cutoff) then - cycle - end if - nidx = nidx+1 - ar2(nidx) = v - c_(nidx) = coef_normalized(iprim) - expo_(nidx) = expo(iprim) - enddo - - s1 = 0.d0 - s5 = 0.d0 - s6 = 0.d0 - do idx = 1, nidx - v = c_(idx) * dexp(-ar2(idx)) - s1 = s1 + v - s6 = s6 - expo_(idx) * v - s5 = s5 + ar2(idx) - end do - s6 = s6 +s6 - s5 = 2.d0*s5 + s6*3.d0 - s2 = s6 * x - s3 = s6 * y - s4 = s6 * z - - l = shell_ang_mom(ishell) - k = ao_index(ishell) - n = lstart(l+1)-lstart(l) - if (nidx > 0) then - idx = lstart(l) - do il = 0,n-1 - ao_vgl(k+il,1,ipoint) = poly_vgl(idx+il,1) * s1 * ao_factor(k+il) - ao_vgl(k+il,2,ipoint) = (poly_vgl(idx+il,2) * s1 + poly_vgl(idx+il,1) * s2) * ao_factor(k+il) - ao_vgl(k+il,3,ipoint) = (poly_vgl(idx+il,3) * s1 + poly_vgl(idx+il,1) * s3) * ao_factor(k+il) - ao_vgl(k+il,4,ipoint) = (poly_vgl(idx+il,4) * s1 + poly_vgl(idx+il,1) * s4) * ao_factor(k+il) - ao_vgl(k+il,5,ipoint) = (poly_vgl(idx+il,5) * s1 + & - poly_vgl(idx+il,1) * s5 + 2.d0*( & - poly_vgl(idx+il,2) * s2 + & - poly_vgl(idx+il,3) * s3 + & - poly_vgl(idx+il,4) * s4 )) * ao_factor(k+il) - end do - else - do il = 0, n-1 - ao_vgl(k+il,1,ipoint) = 0.d0 - ao_vgl(k+il,2,ipoint) = 0.d0 - ao_vgl(k+il,3,ipoint) = 0.d0 - ao_vgl(k+il,4,ipoint) = 0.d0 - ao_vgl(k+il,5,ipoint) = 0.d0 - end do - endif - - end do - end do - end do - - deallocate(poly_vgl, powers, c_, expo_, ar2) - !$OMP END PARALLEL - - deallocate(ao_index) -end function qmckl_compute_ao_vgl_hpc_f + } + } + } + return QMCKL_SUCCESS; +} #+end_src ** Interfaces # #+CALL: generate_c_header(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl")) -# (Commented because the header needs to go into h_private_func +# (Commented because the header needs to go into h_private_func) #+RESULTS: #+begin_src c :tangle (eval h_private_func) :comments org @@ -5018,80 +5008,7 @@ end function qmckl_compute_ao_vgl_hpc_f end function qmckl_compute_ao_vgl_doc #+end_src - - #+CALL: generate_c_interface(table=qmckl_ao_vgl_args_hpc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl_hpc")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_ao_vgl_hpc & - (context, & - ao_num, & - shell_num, & - prim_num, & - point_num, & - nucl_num, & - coord, & - nucl_coord, & - nucleus_index, & - nucleus_shell_num, & - nucleus_range, & - nucleus_max_ang_mom, & - shell_ang_mom, & - shell_prim_index, & - shell_prim_num, & - ao_factor, & - ao_expo, & - coef_normalized, & - ao_vgl) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: ao_num - integer (c_int64_t) , intent(in) , value :: shell_num - integer (c_int64_t) , intent(in) , value :: prim_num - integer (c_int64_t) , intent(in) , value :: point_num - integer (c_int64_t) , intent(in) , value :: nucl_num - real (c_double ) , intent(in) :: coord(point_num,3) - real (c_double ) , intent(in) :: nucl_coord(nucl_num,3) - integer (c_int64_t) , intent(in) :: nucleus_index(nucl_num) - integer (c_int64_t) , intent(in) :: nucleus_shell_num(nucl_num) - real (c_double ) , intent(in) :: nucleus_range(nucl_num) - integer (c_int32_t) , intent(in) :: nucleus_max_ang_mom(nucl_num) - integer (c_int32_t) , intent(in) :: shell_ang_mom(shell_num) - integer (c_int64_t) , intent(in) :: shell_prim_index(shell_num) - integer (c_int64_t) , intent(in) :: shell_prim_num(shell_num) - real (c_double ) , intent(in) :: ao_factor(ao_num) - real (c_double ) , intent(in) :: ao_expo(prim_num) - real (c_double ) , intent(in) :: coef_normalized(prim_num) - real (c_double ) , intent(out) :: ao_vgl(ao_num,5,point_num) - - integer(c_int32_t), external :: qmckl_compute_ao_vgl_hpc_f - info = qmckl_compute_ao_vgl_hpc_f & - (context, & - ao_num, & - shell_num, & - prim_num, & - point_num, & - nucl_num, & - coord, & - nucl_coord, & - nucleus_index, & - nucleus_shell_num, & - nucleus_range, & - nucleus_max_ang_mom, & - shell_ang_mom, & - shell_prim_index, & - shell_prim_num, & - ao_factor, & - ao_expo, & - coef_normalized, & - ao_vgl) - - end function qmckl_compute_ao_vgl_hpc - #+end_src + *** Provide :noexport: From 7ab099f4f579f402e40f680c44ce05d17c8c7ea2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 16 Feb 2022 01:12:42 +0100 Subject: [PATCH 06/19] Prepare polynomials for HPC --- org/qmckl_ao.org | 172 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 150 insertions(+), 22 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 7aca29b..07d27ba 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4060,8 +4060,45 @@ assert(0 == test_qmckl_ao_power(context)); const int64_t ldv ); #+end_src + #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_vgl_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_ao_polynomial_vgl_doc ( + const qmckl_context context, + const double* X, + const double* R, + const int32_t lmax, + int64_t* n, + int32_t* const L, + const int64_t ldl, + double* const VGL, + const int64_t ldv ); + #+end_src + + #+begin_src c :tangle (eval c) :comments org +qmckl_exit_code +qmckl_ao_polynomial_vgl (const qmckl_context context, + const double* X, + const double* R, + const int32_t lmax, + int64_t* n, + int32_t* const L, + const int64_t ldl, + double* const VGL, + const int64_t ldv ) +{ +#ifdef HAVE_HPC + //return qmckl_ao_polynomial_vgl_hpc (context, X, R, lmax, n, L, ldl, VGL, ldv); + return qmckl_ao_polynomial_vgl_doc (context, X, R, lmax, n, L, ldl, VGL, ldv); +#else + return qmckl_ao_polynomial_vgl_doc (context, X, R, lmax, n, L, ldl, VGL, ldv); +#endif +} + #+end_src + #+begin_src f90 :tangle (eval f) -integer function qmckl_ao_polynomial_vgl_f (context, & +integer function qmckl_ao_polynomial_vgl_doc_f (context, & X, R, lmax, n, L, ldl, VGL, ldv) result(info) use qmckl implicit none @@ -4187,14 +4224,14 @@ integer function qmckl_ao_polynomial_vgl_f (context, & info = QMCKL_SUCCESS -end function qmckl_ao_polynomial_vgl_f +end function qmckl_ao_polynomial_vgl_doc_f #+end_src - #+CALL: generate_c_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_vgl_doc" ) #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_ao_polynomial_vgl & + integer(c_int32_t) function qmckl_ao_polynomial_vgl_doc & (context, X, R, lmax, n, L, ldl, VGL, ldv) & bind(C) result(info) @@ -4211,14 +4248,40 @@ end function qmckl_ao_polynomial_vgl_f real (c_double ) , intent(out) :: VGL(ldv,n) integer (c_int64_t) , intent(in) , value :: ldv - integer(c_int32_t), external :: qmckl_ao_polynomial_vgl_f - info = qmckl_ao_polynomial_vgl_f & + integer(c_int32_t), external :: qmckl_ao_polynomial_vgl_doc_f + info = qmckl_ao_polynomial_vgl_doc_f & (context, X, R, lmax, n, L, ldl, VGL, ldv) - end function qmckl_ao_polynomial_vgl + end function qmckl_ao_polynomial_vgl_doc #+end_src - #+CALL: generate_f_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("FRetType"),fname=get_value("Name")) + #+CALL: generate_f_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("FRetType"),fname="qmckl_ao_polynomial_vgl_doc" ) + + #+RESULTS: + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_ao_polynomial_vgl_doc & + (context, X, R, lmax, n, L, ldl, VGL, ldv) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + real (c_double ) , intent(in) :: X(3) + real (c_double ) , intent(in) :: R(3) + integer (c_int32_t) , intent(in) , value :: lmax + integer (c_int64_t) , intent(inout) :: n + integer (c_int32_t) , intent(out) :: L(ldl,n) + integer (c_int64_t) , intent(in) , value :: ldl + real (c_double ) , intent(out) :: VGL(ldv,n) + integer (c_int64_t) , intent(in) , value :: ldv + + end function qmckl_ao_polynomial_vgl_doc + end interface + #+end_src + + #+CALL: generate_f_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("FRetType"),fname="qmckl_ao_polynomial_vgl" ) #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none @@ -4244,7 +4307,9 @@ end function qmckl_ao_polynomial_vgl_f end interface #+end_src + #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl") + #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_ao_polynomial_transp_vgl ( const qmckl_context context, @@ -4255,11 +4320,48 @@ end function qmckl_ao_polynomial_vgl_f int32_t* const L, const int64_t ldl, double* const VGL, - const int64_t ldv ); + const int64_t ldv ); + #+end_src + + #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_ao_polynomial_transp_vgl_doc ( + const qmckl_context context, + const double* X, + const double* R, + const int32_t lmax, + int64_t* n, + int32_t* const L, + const int64_t ldl, + double* const VGL, + const int64_t ldv ); + #+end_src + + #+begin_src c :tangle (eval c) :comments org +qmckl_exit_code +qmckl_ao_polynomial_transp_vgl (const qmckl_context context, + const double* X, + const double* R, + const int32_t lmax, + int64_t* n, + int32_t* const L, + const int64_t ldl, + double* const VGL, + const int64_t ldv ) +{ +#ifdef HAVE_HPC + //return qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, n, L, ldl, VGL, ldv); + return qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, n, L, ldl, VGL, ldv); +#else + return qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, n, L, ldl, VGL, ldv); +#endif +} #+end_src #+begin_src f90 :tangle (eval f) -integer function qmckl_ao_polynomial_transp_vgl_f (context, & +integer function qmckl_ao_polynomial_transp_vgl_doc_f (context, & X, R, lmax, n, L, ldl, VGL, ldv) result(info) use qmckl implicit none @@ -4386,14 +4488,14 @@ integer function qmckl_ao_polynomial_transp_vgl_f (context, & info = QMCKL_SUCCESS -end function qmckl_ao_polynomial_transp_vgl_f +end function qmckl_ao_polynomial_transp_vgl_doc_f #+end_src - #+CALL: generate_c_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_ao_polynomial_transp_vgl & + integer(c_int32_t) function qmckl_ao_polynomial_transp_vgl_doc & (context, X, R, lmax, n, L, ldl, VGL, ldv) & bind(C) result(info) @@ -4407,17 +4509,43 @@ end function qmckl_ao_polynomial_transp_vgl_f integer (c_int64_t) , intent(inout) :: n integer (c_int32_t) , intent(out) :: L(ldl,n) integer (c_int64_t) , intent(in) , value :: ldl - real (c_double ) , intent(out) :: VGL(ldv,5) + real (c_double ) , intent(out) :: VGL(ldv,n) integer (c_int64_t) , intent(in) , value :: ldv - integer(c_int32_t), external :: qmckl_ao_polynomial_transp_vgl_f - info = qmckl_ao_polynomial_transp_vgl_f & + integer(c_int32_t), external :: qmckl_ao_polynomial_transp_vgl_doc_f + info = qmckl_ao_polynomial_transp_vgl_doc_f & (context, X, R, lmax, n, L, ldl, VGL, ldv) - end function qmckl_ao_polynomial_transp_vgl + end function qmckl_ao_polynomial_transp_vgl_doc #+end_src - #+CALL: generate_f_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("FRetType"),fname=get_value("Name")) + #+CALL: generate_f_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("FRetType"),fname="qmckl_ao_polynomial_transp_vgl_doc") + + #+RESULTS: + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_ao_polynomial_transp_vgl_doc & + (context, X, R, lmax, n, L, ldl, VGL, ldv) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + real (c_double ) , intent(in) :: X(3) + real (c_double ) , intent(in) :: R(3) + integer (c_int32_t) , intent(in) , value :: lmax + integer (c_int64_t) , intent(inout) :: n + integer (c_int32_t) , intent(out) :: L(ldl,n) + integer (c_int64_t) , intent(in) , value :: ldl + real (c_double ) , intent(out) :: VGL(ldv,n) + integer (c_int64_t) , intent(in) , value :: ldv + + end function qmckl_ao_polynomial_transp_vgl_doc + end interface + #+end_src + + #+CALL: generate_f_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("FRetType"),fname="qmckl_ao_polynomial_transp_vgl") #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none @@ -4436,11 +4564,11 @@ end function qmckl_ao_polynomial_transp_vgl_f integer (c_int64_t) , intent(inout) :: n integer (c_int32_t) , intent(out) :: L(ldl,n) integer (c_int64_t) , intent(in) , value :: ldl - real (c_double ) , intent(out) :: VGL(ldv,5) + real (c_double ) , intent(out) :: VGL(ldv,n) integer (c_int64_t) , intent(in) , value :: ldv end function qmckl_ao_polynomial_transp_vgl - end interface + end interface #+end_src *** Test :noexport: @@ -4602,7 +4730,7 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & integer :: lstart(0:20) double precision :: x, y, z, r2 double precision :: cutoff - integer, external :: qmckl_ao_polynomial_vgl_f + integer, external :: qmckl_ao_polynomial_vgl_doc_f double precision, allocatable :: poly_vgl(:,:) integer , allocatable :: powers(:,:), ao_index(:) @@ -4650,7 +4778,7 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & end if ! Compute polynomials - info = qmckl_ao_polynomial_vgl_f(context, e_coord, n_coord, & + info = qmckl_ao_polynomial_vgl_doc_f(context, e_coord, n_coord, & nucleus_max_ang_mom(inucl), n_poly, powers, 3_8, & poly_vgl, 5_8) From e90e9a531cfc8a000a06d399c643c845960e5620 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 16 Feb 2022 15:14:41 +0100 Subject: [PATCH 07/19] Added HPC version of polynomials --- configure.ac | 5 +- org/qmckl_ao.org | 345 ++++++++++++++++++++++++++++++++++++-- org/qmckl_blas.org | 42 ++--- org/qmckl_determinant.org | 2 +- org/qmckl_electron.org | 15 +- org/qmckl_jastrow.org | 16 +- org/qmckl_mo.org | 2 +- org/qmckl_nucleus.org | 10 +- org/qmckl_point.org | 4 +- org/qmckl_trexio.org | 5 + 10 files changed, 382 insertions(+), 64 deletions(-) diff --git a/configure.ac b/configure.ac index d99f4a0..f6839d8 100644 --- a/configure.ac +++ b/configure.ac @@ -222,8 +222,9 @@ AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], if test "$ok" = "yes"; then if test "$GCC" = "yes"; then CFLAGS="$CFLAGS \ --Wall -W -Wbad-function-cast -Wcast-qual \ --Wpointer-arith -Wcast-align -Wpedantic -Wextra -fmax-errors=3" +-Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \ +-Wpointer-arith -Wcast-align -Wpedantic -Wextra -fmax-errors=5 -Walloc-zero -Werror \ +" fi if test "$GFC" = "yes"; then FCFLAGS="$FCFLAGS \ diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 07d27ba..5e5a99a 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -73,6 +73,8 @@ gradients and Laplacian of the atomic basis functions. #include #include +#include + #include "chbrclf.h" #include "qmckl_ao_private_func.h" @@ -2399,6 +2401,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < shell_num ; ++i) { assert(shell_ang_mom_test[i] == shell_ang_mom[i]); } +free(shell_ang_mom_test); shell_factor_test = (double*) malloc ( shell_num * sizeof(double)); rc = qmckl_get_ao_basis_shell_factor (context, shell_factor_test, shell_num); @@ -2407,6 +2410,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < shell_num ; ++i) { assert(shell_factor_test[i] == shell_factor[i]); } +free(shell_factor_test); shell_prim_num_test = (int64_t*) malloc ( shell_num * sizeof(int64_t)); rc = qmckl_get_ao_basis_shell_prim_num (context, shell_prim_num_test, shell_num); @@ -2415,6 +2419,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < shell_num ; ++i) { assert(shell_prim_num_test[i] == shell_prim_num[i]); } +free(shell_prim_num_test); shell_prim_index_test = (int64_t*) malloc ( shell_num * sizeof(int64_t)); rc = qmckl_get_ao_basis_shell_prim_index (context, shell_prim_index_test, shell_num); @@ -2423,6 +2428,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < shell_num ; ++i) { assert(shell_prim_index_test[i] == shell_prim_index[i]); } +free(shell_prim_index_test); exponent_test = (double*) malloc ( prim_num * sizeof(double)); rc = qmckl_get_ao_basis_exponent(context, exponent_test, prim_num); @@ -2431,6 +2437,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < prim_num ; ++i) { assert(exponent_test[i] == exponent[i]); } +free(exponent_test); coefficient_test = (double*) malloc ( prim_num * sizeof(double)); rc = qmckl_get_ao_basis_coefficient(context, coefficient_test, prim_num); @@ -2439,6 +2446,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < prim_num ; ++i) { assert(coefficient_test[i] == coefficient[i]); } +free(coefficient_test); prim_factor_test = (double*) malloc ( prim_num * sizeof(double)); rc = qmckl_get_ao_basis_prim_factor (context, prim_factor_test, prim_num); @@ -2447,6 +2455,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < prim_num ; ++i) { assert(prim_factor_test[i] == prim_factor[i]); } +free(prim_factor_test); rc = qmckl_get_ao_basis_ao_num(context, &ao_num_test); assert(ao_num == ao_num_test); @@ -2458,6 +2467,7 @@ assert (rc == QMCKL_SUCCESS); for (int64_t i=0 ; i < ao_num ; ++i) { assert(ao_factor_test[i] == ao_factor[i]); } +free(ao_factor_test); #+end_src @@ -4114,7 +4124,6 @@ integer function qmckl_ao_polynomial_vgl_doc_f (context, & integer*8 :: i,j integer :: a,b,c,d real*8 :: Y(3) - integer :: lmax_array(3) real*8 :: pows(-2:lmax,3) double precision :: xy, yz, xz double precision :: da, db, dc, dd @@ -4146,7 +4155,6 @@ integer function qmckl_ao_polynomial_vgl_doc_f (context, & Y(i) = X(i) - R(i) end do - lmax_array(1:3) = lmax if (lmax == 0) then VGL(1,1) = 1.d0 VGL(2:5,1) = 0.d0 @@ -4339,6 +4347,22 @@ end function qmckl_ao_polynomial_vgl_doc_f const int64_t ldv ); #+end_src + #+CALL: generate_c_header(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl_hpc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_ao_polynomial_transp_vgl_hpc ( + const qmckl_context context, + const double* X, + const double* R, + const int32_t lmax, + int64_t* n, + int32_t* const L, + const int64_t ldl, + double* const VGL, + const int64_t ldv ); + #+end_src + #+begin_src c :tangle (eval c) :comments org qmckl_exit_code qmckl_ao_polynomial_transp_vgl (const qmckl_context context, @@ -4352,8 +4376,7 @@ qmckl_ao_polynomial_transp_vgl (const qmckl_context context, const int64_t ldv ) { #ifdef HAVE_HPC - //return qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, n, L, ldl, VGL, ldv); - return qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, n, L, ldl, VGL, ldv); + return qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, n, L, ldl, VGL, ldv); #else return qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, n, L, ldl, VGL, ldv); #endif @@ -4377,7 +4400,6 @@ integer function qmckl_ao_polynomial_transp_vgl_doc_f (context, & integer*8 :: i,j integer :: a,b,c,d real*8 :: Y(3) - integer :: lmax_array(3) real*8 :: pows(-2:21,3) ! lmax < 22 double precision :: xy, yz, xz double precision :: da, db, dc, dd @@ -4405,7 +4427,6 @@ integer function qmckl_ao_polynomial_transp_vgl_doc_f (context, & endif - lmax_array(1:3) = lmax if (lmax > 0) then do i=1,3 @@ -4422,18 +4443,17 @@ integer function qmckl_ao_polynomial_transp_vgl_doc_f (context, & VGL(1:4,1:5) = 0.d0 VGL(1 ,1 ) = 1.d0 - VGL(2:4,1:5) = 0.d0 l (1,2) = 1 - VGL(2,1) = pows(1,1) + VGL(2,1) = Y(1) VGL(2,2) = 1.d0 l (2,3) = 1 - VGL(3,1) = pows(1,2) + VGL(3,1) = Y(2) VGL(3,3) = 1.d0 l (3,4) = 1 - VGL(4,1) = pows(1,3) + VGL(4,1) = Y(3) VGL(4,4) = 1.d0 n=4 @@ -4456,14 +4476,14 @@ integer function qmckl_ao_polynomial_transp_vgl_doc_f (context, & dc = dd - da - db n = n+1 - l(1,n) = a - l(2,n) = b - l(3,n) = c - xy = pows(a,1) * pows(b,2) yz = pows(b,2) * pows(c,3) xz = pows(a,1) * pows(c,3) + l(1,n) = a + l(2,n) = b + l(3,n) = c + VGL(n,1) = xy * pows(c,3) xy = dc * xy @@ -4491,6 +4511,264 @@ integer function qmckl_ao_polynomial_transp_vgl_doc_f (context, & end function qmckl_ao_polynomial_transp_vgl_doc_f #+end_src + #+begin_src c :tangle (eval c) :comments org +qmckl_exit_code +qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, + const double* X, + const double* R, + const int32_t lmax, + int64_t* n, + int32_t* const L, + const int64_t ldl, + double* const VGL, + const int64_t ldv ) +{ + const qmckl_context_struct* ctx = (qmckl_context_struct* const) context; + assert (ctx != NULL && X != NULL && R != NULL && n != NULL && L != NULL && VGL != NULL); + if (lmax < 0) return QMCKL_INVALID_ARG_4; + if (ldl < 3) return QMCKL_INVALID_ARG_7; + + int32_t m; + + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); + + switch (lmax) { + case 0: + { + if (ldv < 1) return QMCKL_INVALID_ARG_9; + L[0] = 0; + L[ldl] = 0; + L[ldl << 1] = 0; + m=1; + + vgl1[0] = 1.0; + vgl2[0] = 0.0; + vgl3[0] = 0.0; + vgl4[0] = 0.0; + vgl5[0] = 0.0; + break; + } + case 1: + { + if (ldv < 4) return QMCKL_INVALID_ARG_9; + const double Y[3] = { X[0]-R[0], + X[1]-R[1], + X[2]-R[2] }; + + int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl}; + for (int32_t i=0 ; i<4 ; ++i) { + l[i][0] = 0; + l[i][1] = 0; + l[i][2] = 0; + } + l[1][0] = 1; + l[2][1] = 1; + l[3][2] = 1; + + for (int32_t k=0 ; k<4 ; ++k) { + vgl2[k] = 0.0; + vgl3[k] = 0.0; + vgl4[k] = 0.0; + vgl5[k] = 0.0; + } + vgl1[0] = 1.0; + vgl1[1] = Y[0]; + vgl1[2] = Y[1]; + vgl1[3] = Y[2]; + vgl2[1] = 1.0; + vgl3[2] = 1.0; + vgl4[3] = 1.0; + m=4; + break; + } + case 2: + { + if (ldv < 10) return QMCKL_INVALID_ARG_9; + const double Y[3] = { X[0]-R[0], + X[1]-R[1], + X[2]-R[2] }; + + double pows[3][5] = { {1.0, 1.0, 1.0, Y[0], Y[0]*Y[0]}, + {1.0, 1.0, 1.0, Y[1], Y[1]*Y[1]}, + {1.0, 1.0, 1.0, Y[2], Y[2]*Y[2]} }; + + int32_t* l[10]; + for (int32_t i=0 ; i<10 ; ++i) { + l[i] = L + i*ldl; + } + + for (int32_t i=0 ; i<4 ; ++i) { + l[i][0] = 0; + l[i][1] = 0; + l[i][2] = 0; + } + l[1][0] = 1; + l[2][1] = 1; + l[3][2] = 1; + + for (int32_t k=0 ; k<4 ; ++k) { + vgl2[k] = 0.0; + vgl3[k] = 0.0; + vgl4[k] = 0.0; + vgl5[k] = 0.0; + } + vgl1[0] = 1.0; + vgl1[1] = Y[0]; + vgl1[2] = Y[1]; + vgl1[3] = Y[2]; + + vgl2[1] = 1.0; + vgl3[2] = 1.0; + vgl4[3] = 1.0; + m=4; + + double da = 2.0; + + for (int32_t a=2 ; a>=0 ; --a) { + double db = 2.0-da; + + for (int32_t b=2-a ; b>=0 ; --b) { + const int32_t c = 2 - a - b; + const double dc = 2.0 - da - db; + + double xy = pows[0][a+2] * pows[1][b+2]; + double yz = pows[1][b+2] * pows[2][c+2]; + double xz = pows[0][a+2] * pows[2][c+2]; + + l[m][0] = a; + l[m][1] = b; + l[m][2] = c; + + vgl1[m] = xy * pows[2][c+2]; + + xy *= dc; + xz *= db; + yz *= da; + + vgl2[m] = pows[0][a+1] * yz; + vgl3[m] = pows[1][b+1] * xz; + vgl4[m] = pows[2][c+1] * xy; + + vgl5[m] = (da-1.) * pows[0][a] * yz + + (db-1.) * pows[1][b] * xz + + (dc-1.) * pows[2][c] * xy; + + db -= 1.0; + ++m; + } + da -= 1.0; + } + break; + } + /* + case 3: + { + if (ldv < 20) return QMCKL_INVALID_ARG_9; + } + ,*/ + default: + { + const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6; + if (ldv < size_max) return QMCKL_INVALID_ARG_9; + const double Y[3] = { X[0]-R[0], + X[1]-R[1], + X[2]-R[2] }; + double pows[3][size_max]; + + for (int32_t i=0 ; i<=2 ; ++i) { + pows[0][i] = 1.0; + pows[1][i] = 1.0; + pows[2][i] = 1.0; + } + + for (int32_t i=3 ; i<=lmax+2 ; ++i) { + pows[0][i] = pows[0][i-1] * Y[0]; + pows[1][i] = pows[1][i-1] * Y[1]; + pows[2][i] = pows[2][i-1] * Y[2]; + } + + int32_t* l[24]; + for (int32_t i=0 ; i=0 ; --a) { + double db = dd-da; + + for (int32_t b=d-a ; b>=0 ; --b) { + const int32_t c = d - a - b; + const double dc = dd - da - db; + + double xy = pows[0][a+2] * pows[1][b+2]; + double yz = pows[1][b+2] * pows[2][c+2]; + double xz = pows[0][a+2] * pows[2][c+2]; + + l[m][0] = a; + l[m][1] = b; + l[m][2] = c; + + vgl1[m] = xy * pows[2][c+2]; + + xy *= dc; + xz *= db; + yz *= da; + + vgl2[m] = pows[0][a+1] * yz; + vgl3[m] = pows[1][b+1] * xz; + vgl4[m] = pows[2][c+1] * xy; + + vgl5[m] = (da-1.) * pows[0][a] * yz + + (db-1.) * pows[1][b] * xz + + (dc-1.) * pows[2][c] * xy; + + db -= 1.0; + ++m; + } + da -= 1.0; + } + dd += 1.0; + } + } + } + ,*n = m; + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_interface(table=qmckl_ao_polynomial_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_ao_polynomial_transp_vgl_doc") #+RESULTS: @@ -4666,8 +4944,42 @@ end function test_qmckl_ao_polynomial_vgl #+end_src #+begin_src c :tangle (eval c_test) - int test_qmckl_ao_polynomial_vgl(qmckl_context context); - assert(0 == test_qmckl_ao_polynomial_vgl(context)); +int test_qmckl_ao_polynomial_vgl(qmckl_context context); +assert(0 == test_qmckl_ao_polynomial_vgl(context)); + +double X[3] = { 1.1, 2.2, 3.3 }; +double R[3] = { 0.2, 1.1, 3.0 }; +int64_t n; +int64_t ldl = 4; +int64_t ldv = 24; +int32_t L0[24][ldl]; +int32_t L1[24][ldl]; +double VGL0[5][ldv]; +double VGL1[5][ldv]; +memset(&L0[0][0], 0, sizeof(L0)); +memset(&L1[0][0], 0, sizeof(L1)); +memset(&VGL0[0][0], 0, sizeof(VGL0)); +memset(&VGL1[0][0], 0, sizeof(VGL1)); +for (int32_t lmax=0 ; lmax<=3 ; lmax++) { +rc = qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, &n, &(L0[0][0]), ldl, &(VGL0[0][0]), ldv); +assert (rc == QMCKL_SUCCESS); +rc = qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, &n, &(L1[0][0]), ldl, &(VGL1[0][0]), ldv); +assert (rc == QMCKL_SUCCESS); +printf("lmax=%d\n", lmax); +for (int32_t l=0 ; ldata != NULL); qmckl_exit_code rc; - rc = qmckl_free(context, vector.data); + rc = qmckl_free(context, vector->data); if (rc != QMCKL_SUCCESS) { return rc; } - vector.size = (int64_t) 0; - vector.data = NULL; + vector->size = (int64_t) 0; + vector->data = NULL; return QMCKL_SUCCESS; } #+end_src @@ -192,26 +192,26 @@ qmckl_matrix_alloc( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_func) qmckl_exit_code qmckl_matrix_free( qmckl_context context, - qmckl_matrix matrix); + qmckl_matrix* matrix); #+end_src #+begin_src c :comments org :tangle (eval c) qmckl_exit_code qmckl_matrix_free( qmckl_context context, - qmckl_matrix matrix) + qmckl_matrix* matrix) { /* Always true */ - assert (matrix.data != NULL); + assert (matrix->data != NULL); qmckl_exit_code rc; - rc = qmckl_free(context, matrix.data); + rc = qmckl_free(context, matrix->data); if (rc != QMCKL_SUCCESS) { return rc; } - matrix.data = NULL; - matrix.size[0] = (int64_t) 0; - matrix.size[1] = (int64_t) 0; + matrix->data = NULL; + matrix->size[0] = (int64_t) 0; + matrix->size[1] = (int64_t) 0; return QMCKL_SUCCESS; } @@ -286,25 +286,25 @@ qmckl_tensor_alloc( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_func) qmckl_exit_code qmckl_tensor_free( qmckl_context context, - qmckl_tensor tensor); + qmckl_tensor* tensor); #+end_src #+begin_src c :comments org :tangle (eval c) qmckl_exit_code qmckl_tensor_free( qmckl_context context, - qmckl_tensor tensor) + qmckl_tensor* tensor) { /* Always true */ - assert (tensor.data != NULL); + assert (tensor->data != NULL); qmckl_exit_code rc; - rc = qmckl_free(context, tensor.data); + rc = qmckl_free(context, tensor->data); if (rc != QMCKL_SUCCESS) { return rc; } - memset(&tensor, 0, sizeof(qmckl_tensor)); + memset(tensor, 0, sizeof(qmckl_tensor)); return QMCKL_SUCCESS; } @@ -712,7 +712,7 @@ qmckl_tensor_of_double(const qmckl_context context, for (int64_t i=0 ; i

det.uninitialized & mask) != 0) { - return (int64_t) 0; + return NULL; } assert (ctx->det.mo_index_alpha != NULL); diff --git a/org/qmckl_electron.org b/org/qmckl_electron.org index 5e0d5dd..33ac366 100644 --- a/org/qmckl_electron.org +++ b/org/qmckl_electron.org @@ -499,18 +499,15 @@ ctx->electron.provided = (ctx->electron.uninitialized == 0); if (ctx->electron.provided) { if (ctx->electron.coord_new.data != NULL) { - const qmckl_exit_code rc = qmckl_matrix_free(context, ctx->electron.coord_new); + const qmckl_exit_code rc = qmckl_matrix_free(context, &(ctx->electron.coord_new)); assert (rc == QMCKL_SUCCESS); } if (ctx->electron.coord_old.data != NULL) { - const qmckl_exit_code rc = qmckl_matrix_free(context, ctx->electron.coord_old); + const qmckl_exit_code rc = qmckl_matrix_free(context, &(ctx->electron.coord_old)); assert (rc == QMCKL_SUCCESS); } - const int64_t walk_num = ctx->electron.walk_num; - const int64_t elec_num = ctx->electron.num; - - const int64_t point_num = walk_num * elec_num; + const int64_t point_num = ctx->electron.walk_num * ctx->electron.num; qmckl_matrix coord_new = qmckl_matrix_alloc(context, point_num, 3); @@ -1667,8 +1664,7 @@ qmckl_exit_code qmckl_provide_ee_potential(qmckl_context context) ctx->electron.ee_pot = ee_pot; } - qmckl_exit_code rc = - qmckl_compute_ee_potential(context, + rc = qmckl_compute_ee_potential(context, ctx->electron.num, ctx->electron.walk_num, ctx->electron.ee_distance, @@ -2712,8 +2708,7 @@ qmckl_exit_code qmckl_provide_en_potential(qmckl_context context) ctx->electron.en_pot = en_pot; } - qmckl_exit_code rc = - qmckl_compute_en_potential(context, + rc = qmckl_compute_en_potential(context, ctx->electron.num, ctx->nucleus.num, ctx->electron.walk_num, diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index d745dcf..61062af 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -907,7 +907,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context, qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (aord_num + 1) * type_nucl_num * sizeof(double); - if (size_max < mem_info.size/sizeof(double)) { + if ((size_t) size_max < mem_info.size/sizeof(double)) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, "qmckl_set_jastrow_aord_vector", @@ -970,7 +970,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context, qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (bord_num + 1) * sizeof(double); - if (size_max < mem_info.size/sizeof(double)) { + if ((size_t) size_max < mem_info.size/sizeof(double)) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, "qmckl_set_jastrow_bord_vector", @@ -1040,7 +1040,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = dim_cord_vect * type_nucl_num * sizeof(double); - if (size_max < mem_info.size/sizeof(double)) { + if ((size_t) size_max < mem_info.size/sizeof(double)) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, "qmckl_set_jastrow_cord_vector", @@ -1333,7 +1333,7 @@ qmckl_get_jastrow_asymp_jasb(qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = 2; + int64_t sze = 2; if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, @@ -3084,7 +3084,7 @@ qmckl_get_jastrow_een_rescaled_e(qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = ctx->electron.num * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + int64_t sze = ctx->electron.num * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, @@ -3416,7 +3416,7 @@ qmckl_get_jastrow_een_rescaled_e_deriv_e(qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = ctx->electron.num * 4 * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + int64_t sze = ctx->electron.num * 4 * ctx->electron.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, @@ -3795,7 +3795,7 @@ qmckl_get_jastrow_een_rescaled_n(qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + int64_t sze = ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, @@ -4107,7 +4107,7 @@ qmckl_get_jastrow_een_rescaled_n_deriv_e(qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = ctx->electron.num * 4 * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); + int64_t sze = ctx->electron.num * 4 * ctx->nucleus.num * ctx->electron.walk_num * (ctx->jastrow.cord_num + 1); if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 709fb31..d4efb41 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -104,7 +104,7 @@ typedef struct qmckl_mo_basis_struct { double * coefficient; double * mo_vgl; - int64_t mo_vgl_date; + uint64_t mo_vgl_date; int32_t uninitialized; bool provided; diff --git a/org/qmckl_nucleus.org b/org/qmckl_nucleus.org index a562618..da003b5 100644 --- a/org/qmckl_nucleus.org +++ b/org/qmckl_nucleus.org @@ -333,7 +333,7 @@ qmckl_get_nucleus_coord (const qmckl_context context, rc = qmckl_transpose(context, ctx->nucleus.coord, At); if (rc != QMCKL_SUCCESS) return rc; rc = qmckl_double_of_matrix(context, At, coord, size_max); - qmckl_matrix_free(context, At); + qmckl_matrix_free(context, &At); } else { rc = qmckl_double_of_matrix(context, ctx->nucleus.coord, coord, size_max); } @@ -470,6 +470,12 @@ qmckl_set_nucleus_charge(qmckl_context context, ctx->nucleus.charge = qmckl_vector_alloc(context, num); rc = qmckl_vector_of_double(context, charge, num, &(ctx->nucleus.charge)); + if (rc != QMCKL_SUCCESS) { + return qmckl_failwith( context, + QMCKL_FAILURE, + "qmckl_set_nucleus_charge", + "Error in vector->double* conversion"); + } <> } @@ -518,7 +524,7 @@ qmckl_set_nucleus_coord(qmckl_context context, const int64_t nucl_num = (int64_t) ctx->nucleus.num; if (ctx->nucleus.coord.data != NULL) { - rc = qmckl_matrix_free(context, ctx->nucleus.coord); + rc = qmckl_matrix_free(context, &(ctx->nucleus.coord)); if (rc != QMCKL_SUCCESS) return rc; } diff --git a/org/qmckl_point.org b/org/qmckl_point.org index d0f4614..0672d1c 100644 --- a/org/qmckl_point.org +++ b/org/qmckl_point.org @@ -224,7 +224,7 @@ qmckl_get_point(const qmckl_context context, if (rc != QMCKL_SUCCESS) return rc; rc = qmckl_double_of_matrix( context, At, coord, size_max); if (rc != QMCKL_SUCCESS) return rc; - rc = qmckl_matrix_free(context, At); + rc = qmckl_matrix_free(context, &At); } else { rc = qmckl_double_of_matrix( context, ctx->point.coord, coord, size_max); } @@ -301,7 +301,7 @@ qmckl_set_point (qmckl_context context, if (ctx->point.num < num) { if (ctx->point.coord.data != NULL) { - rc = qmckl_matrix_free(context, ctx->point.coord); + rc = qmckl_matrix_free(context, &(ctx->point.coord)); assert (rc == QMCKL_SUCCESS); } diff --git a/org/qmckl_trexio.org b/org/qmckl_trexio.org index e08f044..5afeadd 100644 --- a/org/qmckl_trexio.org +++ b/org/qmckl_trexio.org @@ -429,6 +429,11 @@ qmckl_trexio_read_ao_X(qmckl_context context, trexio_t* const file) /* Reformat data */ rc = qmckl_set_ao_basis_nucleus_index(context, nucleus_index, nucleus_num); + if (rc != QMCKL_SUCCESS) { + qmckl_free(context, nucleus_index); + nucleus_index = NULL; + return rc; + } for (int i=shell_num-1 ; i>=0 ; --i) { const int k = tmp_array[i]; From 733d941c308f67b6726630c2c8ce05fbb5a5b3a4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 16 Feb 2022 19:40:14 +0100 Subject: [PATCH 08/19] Optimized polynomials --- org/qmckl_ao.org | 313 +++++++++++++++++++++++++---------------------- 1 file changed, 169 insertions(+), 144 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 5e5a99a..fbbb178 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4530,153 +4530,176 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, int32_t m; - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv << 1); - double* const vgl4 = VGL + 3*ldv; - double* const vgl5 = VGL + (ldv << 2); - switch (lmax) { case 0: { if (ldv < 1) return QMCKL_INVALID_ARG_9; - L[0] = 0; - L[ldl] = 0; - L[ldl << 1] = 0; - m=1; + L[0] = 0; L[1] = 0; L[2] = 0; - vgl1[0] = 1.0; - vgl2[0] = 0.0; - vgl3[0] = 0.0; - vgl4[0] = 0.0; - vgl5[0] = 0.0; + VGL[0 ] = 1.0; + VGL[ldv ] = 0.0; + VGL[ldv<<1 ] = 0.0; + VGL[ldv<<2-1] = 0.0; + VGL[ldv<<2 ] = 0.0; + m=1; break; } case 1: { if (ldv < 4) return QMCKL_INVALID_ARG_9; - const double Y[3] = { X[0]-R[0], - X[1]-R[1], - X[2]-R[2] }; - int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl}; - for (int32_t i=0 ; i<4 ; ++i) { - l[i][0] = 0; - l[i][1] = 0; - l[i][2] = 0; - } - l[1][0] = 1; - l[2][1] = 1; - l[3][2] = 1; + if (ldl == 3) { - for (int32_t k=0 ; k<4 ; ++k) { - vgl2[k] = 0.0; - vgl3[k] = 0.0; - vgl4[k] = 0.0; - vgl5[k] = 0.0; + const int32_t ltmp[12] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1}; + for (int i=0 ; i<12 ; ++i) + L[i] = ltmp[i]; + + } else { + + int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+3*ldl}; + l[0][0] = 0; l[0][1] = 0; l[0][2] = 0; + l[1][0] = 1; l[1][1] = 0; l[1][2] = 0; + l[2][0] = 0; l[2][1] = 1; l[2][2] = 0; + l[3][0] = 0; l[3][1] = 0; l[3][2] = 1; + } + + + if (ldv == 4) { + + const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 0.0}; + for (int i=0 ; i<20 ; ++i) + VGL[i] = tmp[i]; + + } else { + + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); + + for (int32_t k=0 ; k<4 ; ++k) { + vgl2[k] = 0.0; + vgl3[k] = 0.0; + vgl4[k] = 0.0; + vgl5[k] = 0.0; + } + vgl1[0] = 1.0; + vgl1[1] = X[0]-R[0]; + vgl1[2] = X[1]-R[1]; + vgl1[3] = X[2]-R[2]; + vgl2[1] = 1.0; + vgl3[2] = 1.0; + vgl4[3] = 1.0; } - vgl1[0] = 1.0; - vgl1[1] = Y[0]; - vgl1[2] = Y[1]; - vgl1[3] = Y[2]; - vgl2[1] = 1.0; - vgl3[2] = 1.0; - vgl4[3] = 1.0; m=4; break; } case 2: { if (ldv < 10) return QMCKL_INVALID_ARG_9; + if (ldl == 3) { + const int32_t ltmp[30] = {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, + 2, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, + 0, 1, 1, 0, 0, 2}; + for (int i=0 ; i<30 ; ++i) + L[i] = ltmp[i]; + + } else { + int32_t* l[10]; + for (int32_t i=0 ; i<10 ; ++i) { + l[i] = L + i*ldl; + } + + l[0][0] = 0; l[0][1] = 0; l[0][2] = 0; + l[1][0] = 1; l[1][1] = 0; l[1][2] = 0; + l[2][0] = 0; l[2][1] = 1; l[2][2] = 0; + l[3][0] = 0; l[3][1] = 0; l[3][2] = 1; + l[4][0] = 2; l[4][1] = 0; l[4][2] = 0; + l[5][0] = 1; l[5][1] = 1; l[5][2] = 0; + l[6][0] = 1; l[6][1] = 0; l[6][2] = 1; + l[7][0] = 0; l[7][1] = 2; l[7][2] = 0; + l[8][0] = 0; l[8][1] = 1; l[8][2] = 1; + l[9][0] = 0; l[9][1] = 0; l[9][2] = 2; + } + const double Y[3] = { X[0]-R[0], X[1]-R[1], X[2]-R[2] }; - double pows[3][5] = { {1.0, 1.0, 1.0, Y[0], Y[0]*Y[0]}, - {1.0, 1.0, 1.0, Y[1], Y[1]*Y[1]}, - {1.0, 1.0, 1.0, Y[2], Y[2]*Y[2]} }; - - int32_t* l[10]; - for (int32_t i=0 ; i<10 ; ++i) { - l[i] = L + i*ldl; - } - - for (int32_t i=0 ; i<4 ; ++i) { - l[i][0] = 0; - l[i][1] = 0; - l[i][2] = 0; - } - l[1][0] = 1; - l[2][1] = 1; - l[3][2] = 1; - - for (int32_t k=0 ; k<4 ; ++k) { - vgl2[k] = 0.0; - vgl3[k] = 0.0; - vgl4[k] = 0.0; - vgl5[k] = 0.0; - } - vgl1[0] = 1.0; - vgl1[1] = Y[0]; - vgl1[2] = Y[1]; - vgl1[3] = Y[2]; + if (ldv == 50) { + const double tmp[50] = { + 1.0, Y[0], Y[1], Y[2], Y[0] * Y[0], + Y[0] * Y[1], Y[0] * Y[2], Y[1] * Y[1], Y[1] * Y[2], Y[2] * Y[2], + 0.0, 1.0, 0.0, 0.0, Y[0] + Y[0], + Y[1], Y[2], 0.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, 0.0, + Y[0], 0.0, Y[1] + Y[1], Y[2], 0.0, + 0.0, 0.0, 0.0, 1.0, 0.0, + 0.0, Y[0], 0.0, Y[1], Y[2] + Y[2], + 0.0, 0.0, 0.0, 0.0, 2.0, + 0.0, 0.0, 2.0, 0., 2.0 }; + for (int i=0 ; i<50 ; ++i) + VGL[i] = tmp[i]; + } else { + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); - vgl2[1] = 1.0; - vgl3[2] = 1.0; - vgl4[3] = 1.0; - m=4; - - double da = 2.0; - - for (int32_t a=2 ; a>=0 ; --a) { - double db = 2.0-da; - - for (int32_t b=2-a ; b>=0 ; --b) { - const int32_t c = 2 - a - b; - const double dc = 2.0 - da - db; - - double xy = pows[0][a+2] * pows[1][b+2]; - double yz = pows[1][b+2] * pows[2][c+2]; - double xz = pows[0][a+2] * pows[2][c+2]; - - l[m][0] = a; - l[m][1] = b; - l[m][2] = c; - - vgl1[m] = xy * pows[2][c+2]; - - xy *= dc; - xz *= db; - yz *= da; - - vgl2[m] = pows[0][a+1] * yz; - vgl3[m] = pows[1][b+1] * xz; - vgl4[m] = pows[2][c+1] * xy; - - vgl5[m] = (da-1.) * pows[0][a] * yz + - (db-1.) * pows[1][b] * xz + - (dc-1.) * pows[2][c] * xy; - - db -= 1.0; - ++m; - } - da -= 1.0; - } + vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1]; + vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1]; + vgl1[6] = Y[0]*Y[2]; vgl1[7] = Y[1]*Y[1]; vgl1[8] = Y[1]*Y[2]; + vgl1[9] = Y[2]*Y[2]; + + vgl2[0] = 0.0 ; vgl2[1] = 1.0 ; vgl2[2] = 0.0 ; + vgl2[3] = 0.0 ; vgl2[4] = Y[0]+Y[0]; vgl2[5] = Y[1]; + vgl2[6] = Y[2]; vgl2[7] = 0.0 ; vgl2[8] = 0.0 ; + vgl2[9] = 0.0 ; + + vgl3[0] = 0.0; vgl3[1] = 0.0 ; vgl3[2] = 1.0 ; + vgl3[3] = 0.0; vgl3[4] = 0.0 ; vgl3[5] = Y[0]; + vgl3[6] = 0.0; vgl3[7] = Y[1]+Y[1]; vgl3[8] = Y[2]; + vgl3[9] = 0.0; + + vgl4[0] = 0.0 ; vgl4[1] = 0.0; vgl4[2] = 0.0 ; + vgl4[3] = 1.0 ; vgl4[4] = 0.0; vgl4[5] = 0.0 ; + vgl4[6] = Y[0] ; vgl4[7] = 0.0; vgl4[8] = Y[1]; + vgl4[9] = Y[2]+Y[2]; + + vgl5[0] = 0.0; vgl5[1] = 0.0; vgl5[2] = 0.0; + vgl5[3] = 0.0; vgl5[4] = 2.0; vgl5[5] = 0.0; + vgl5[6] = 0.0; vgl5[7] = 2.0; vgl5[8] = 0.0; + vgl5[9] = 2.0; + } + m=10; break; } /* - case 3: - { - if (ldv < 20) return QMCKL_INVALID_ARG_9; - } - ,*/ + case 3: + { + if (ldv < 20) return QMCKL_INVALID_ARG_9; + } + ,*/ default: { const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6; if (ldv < size_max) return QMCKL_INVALID_ARG_9; + + double* const vgl1 = VGL; + double* const vgl2 = VGL + ldv; + double* const vgl3 = VGL + (ldv << 1); + double* const vgl4 = VGL + 3*ldv; + double* const vgl5 = VGL + (ldv << 2); + const double Y[3] = { X[0]-R[0], X[1]-R[1], X[2]-R[2] }; + double pows[3][size_max]; for (int32_t i=0 ; i<=2 ; ++i) { @@ -4949,36 +4972,38 @@ assert(0 == test_qmckl_ao_polynomial_vgl(context)); double X[3] = { 1.1, 2.2, 3.3 }; double R[3] = { 0.2, 1.1, 3.0 }; -int64_t n; -int64_t ldl = 4; -int64_t ldv = 24; -int32_t L0[24][ldl]; -int32_t L1[24][ldl]; -double VGL0[5][ldv]; -double VGL1[5][ldv]; -memset(&L0[0][0], 0, sizeof(L0)); -memset(&L1[0][0], 0, sizeof(L1)); -memset(&VGL0[0][0], 0, sizeof(VGL0)); -memset(&VGL1[0][0], 0, sizeof(VGL1)); -for (int32_t lmax=0 ; lmax<=3 ; lmax++) { -rc = qmckl_ao_polynomial_transp_vgl_doc (context, X, R, lmax, &n, &(L0[0][0]), ldl, &(VGL0[0][0]), ldv); -assert (rc == QMCKL_SUCCESS); -rc = qmckl_ao_polynomial_transp_vgl_hpc (context, X, R, lmax, &n, &(L1[0][0]), ldl, &(VGL1[0][0]), ldv); -assert (rc == QMCKL_SUCCESS); -printf("lmax=%d\n", lmax); -for (int32_t l=0 ; l Date: Wed, 16 Feb 2022 19:49:05 +0100 Subject: [PATCH 09/19] Fix CI build --- org/qmckl_ao.org | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index fbbb178..80acb5e 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4536,11 +4536,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, if (ldv < 1) return QMCKL_INVALID_ARG_9; L[0] = 0; L[1] = 0; L[2] = 0; - VGL[0 ] = 1.0; - VGL[ldv ] = 0.0; - VGL[ldv<<1 ] = 0.0; - VGL[ldv<<2-1] = 0.0; - VGL[ldv<<2 ] = 0.0; + VGL[0 ] = 1.0; + VGL[ldv ] = 0.0; + VGL[ldv<<1 ] = 0.0; + VGL[(ldv<<2)-1] = 0.0; + VGL[ldv<<2 ] = 0.0; m=1; break; } From 7fe73e01041c42c5979ecaf658cb71ae4d18ac93 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 17 Feb 2022 01:36:45 +0100 Subject: [PATCH 10/19] Fix bug in fast AOs --- org/qmckl_ao.org | 6 +++--- org/qmckl_trexio.org | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 80acb5e..b546ef0 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -5305,12 +5305,12 @@ qmckl_compute_ao_vgl_hpc ( double s6_ = 0.; for (int idx=0 ; idx Date: Thu, 17 Feb 2022 12:36:16 +0100 Subject: [PATCH 11/19] Accelerated AOs in HPC --- org/qmckl_ao.org | 146 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 24 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index b546ef0..4007e5b 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4566,10 +4566,12 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, if (ldv == 4) { - const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], 0.0, - 1.0, 0.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 0.0}; + const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + 0.0, 0.0, 0.0, 0.0}; + for (int i=0 ; i<20 ; ++i) VGL[i] = tmp[i]; @@ -5248,6 +5250,16 @@ qmckl_compute_ao_vgl_hpc ( double expo_[prim_num]; double ar2[prim_num]; int32_t powers[prim_num]; + double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, + {0.0, 1.0, 0.0, 0.0}, + {0.0, 0.0, 1.0, 0.0}, + {0.0, 0.0, 0.0, 1.0}, + {0.0, 0.0, 0.0, 0.0}}; + double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, + {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, + {0., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, + {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}}; double poly_vgl[5][size_max]; #ifdef HAVE_OPENMP @@ -5275,12 +5287,47 @@ qmckl_compute_ao_vgl_hpc ( } int64_t n_poly; - const qmckl_exit_code rc = - qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, - nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3, - &(poly_vgl[0][0]), size_max); + switch (nucleus_max_ang_mom[inucl]) { + case 0: + break; - assert (rc == QMCKL_SUCCESS); + case 1: + poly_vgl_l1[0][1] = x; + poly_vgl_l1[0][2] = y; + poly_vgl_l1[0][3] = z; + break; + + case 2: + poly_vgl_l2[0][1] = x; + poly_vgl_l2[0][2] = y; + poly_vgl_l2[0][3] = z; + poly_vgl_l2[0][4] = x*x; + poly_vgl_l2[0][5] = x*y; + poly_vgl_l2[0][6] = x*z; + poly_vgl_l2[0][7] = y*y; + poly_vgl_l2[0][8] = y*z; + poly_vgl_l2[0][9] = z*z; + poly_vgl_l2[1][4] = x+x; + poly_vgl_l2[1][5] = y; + poly_vgl_l2[1][6] = z; + poly_vgl_l2[2][5] = x; + poly_vgl_l2[2][7] = y+y; + poly_vgl_l2[2][8] = z; + poly_vgl_l2[3][6] = x; + poly_vgl_l2[3][8] = y; + poly_vgl_l2[3][9] = z+z; + break; + + default: + const qmckl_exit_code rc = + qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + &(poly_vgl[0][0]), size_max); + + assert (rc == QMCKL_SUCCESS); + break; + } const int64_t ishell_start = nucleus_index[inucl]; const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; @@ -5305,10 +5352,10 @@ qmckl_compute_ao_vgl_hpc ( double s6_ = 0.; for (int idx=0 ; idx 0) { - const int64_t idx = lstart[l]; - const double* poly_vgl_1 = &(poly_vgl[0][idx]); - const double* poly_vgl_2 = &(poly_vgl[1][idx]); - const double* poly_vgl_3 = &(poly_vgl[2][idx]); - const double* poly_vgl_4 = &(poly_vgl[3][idx]); - const double* poly_vgl_5 = &(poly_vgl[4][idx]); const double* f = &(ao_factor[k]); + const int64_t idx = lstart[l]; - for (int64_t il=0 ; il Date: Thu, 17 Feb 2022 15:37:57 +0100 Subject: [PATCH 12/19] Accelerate AOs in HPC --- org/qmckl_ao.org | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 4007e5b..91dfe05 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -5246,6 +5246,7 @@ qmckl_compute_ao_vgl_hpc ( #pragma omp parallel #endif { + qmckl_exit_code rc; double c_[prim_num]; double expo_[prim_num]; double ar2[prim_num]; @@ -5253,8 +5254,7 @@ qmckl_compute_ao_vgl_hpc ( double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 0.0}, - {0.0, 0.0, 0.0, 1.0}, - {0.0, 0.0, 0.0, 0.0}}; + {0.0, 0.0, 0.0, 1.0}}; double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, @@ -5319,8 +5319,7 @@ qmckl_compute_ao_vgl_hpc ( break; default: - const qmckl_exit_code rc = - qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, + rc = qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3, &(poly_vgl[0][0]), size_max); From 39bc0fb2e8c3572412c604ab3260104dd069e921 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 17 Feb 2022 15:49:41 +0100 Subject: [PATCH 13/19] Fixing MacOS CI --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index f6839d8..5436779 100644 --- a/configure.ac +++ b/configure.ac @@ -223,7 +223,7 @@ if test "$ok" = "yes"; then if test "$GCC" = "yes"; then CFLAGS="$CFLAGS \ -Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \ --Wpointer-arith -Wcast-align -Wpedantic -Wextra -fmax-errors=5 -Walloc-zero -Werror \ +-Wpointer-arith -Wcast-align -Wpedantic -Wextra -fmax-errors=3 -Walloc-zero -Werror \ " fi if test "$GFC" = "yes"; then From 45e7eab963f362254449266bfe9f753b9eccd876 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 17 Feb 2022 16:08:22 +0100 Subject: [PATCH 14/19] Fixing MacOS CI --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 5436779..eb31b93 100644 --- a/configure.ac +++ b/configure.ac @@ -223,7 +223,7 @@ if test "$ok" = "yes"; then if test "$GCC" = "yes"; then CFLAGS="$CFLAGS \ -Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \ --Wpointer-arith -Wcast-align -Wpedantic -Wextra -fmax-errors=3 -Walloc-zero -Werror \ +-Wpointer-arith -Wcast-align -Wpedantic -Wextra -Walloc-zero -Werror \ " fi if test "$GFC" = "yes"; then From c93e7828c5640e675189e807d0437ceac47d1733 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 17 Feb 2022 22:29:53 +0100 Subject: [PATCH 15/19] Added qmckl_context_touch for benchmarking --- org/qmckl_ao.org | 12 +++++------- org/qmckl_context.org | 38 +++++++++++++++++++++++++++++--------- org/qmckl_point.org | 15 ++++++++++++--- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 91dfe05..1202117 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -5384,13 +5384,11 @@ qmckl_compute_ao_vgl_hpc ( switch (nucleus_max_ang_mom[inucl]) { case 0: - for (int64_t il=0 ; ildate += 1UL; + ctx->point.date += 1UL; + return QMCKL_SUCCESS; +} + #+end_src + ** Creation To create a new context, ~qmckl_context_create()~ should be used. diff --git a/org/qmckl_point.org b/org/qmckl_point.org index 0672d1c..08bda8c 100644 --- a/org/qmckl_point.org +++ b/org/qmckl_point.org @@ -318,8 +318,17 @@ qmckl_set_point (qmckl_context context, ctx->point.num = num; if (transp == 'T') { - memcpy(ctx->point.coord.data, coord, 3*num*sizeof(double)); + double *a = ctx->point.coord.data; +#ifdef HAVE_OPENMP + #pragma omp for +#endif + for (int64_t i=0 ; i<3*num ; ++i) { + a[i] = coord[i]; + } } else { +#ifdef HAVE_OPENMP + #pragma omp for +#endif for (int64_t i=0 ; ipoint.coord, i, 0) = coord[3*i ]; qmckl_mat(ctx->point.coord, i, 1) = coord[3*i+1]; @@ -328,8 +337,8 @@ qmckl_set_point (qmckl_context context, } /* Increment the date of the context */ - ctx->date += 1UL; - ctx->point.date = ctx->date; + rc = qmckl_context_touch(context); + assert (rc == QMCKL_SUCCESS); return QMCKL_SUCCESS; From 2ffcf25492f93178e602cdd53ee42773da2d9588 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 17 Feb 2022 23:30:16 +0100 Subject: [PATCH 16/19] Out of source build --- Makefile.am | 18 ++++++++++-------- tools/build_doc.sh | 9 ++------- tools/build_makefile.py | 6 +++--- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/Makefile.am b/Makefile.am index 1137766..d658fa5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -82,17 +82,20 @@ htmldir_loc = share/doc/qmckl/html textdir_loc = share/doc/qmckl/text htmlize_el = $(htmldir_loc)/htmlize.el dist_html_DATA = $(HTML_FILES) \ - share/doc/qmckl/html/qmckl.css \ - share/doc/qmckl/html/index.html + $(htmldir_loc)/qmckl.css \ + $(htmldir_loc)/index.html dist_text_DATA = $(TEXT_FILES) -html-local: $(dist_html_DATA) -text: $(dist_text_DATA) +html-local: $(htmlize_el) $(dist_html_DATA) +text: $(htmlize_el) $(dist_text_DATA) doc: html text +$(htmldir_loc)/qmckl.css: $(htmlize_el) + $(MKDIR_P) $(htmldir_loc) $(textdir_loc) + cp -f $(top_srcdir)/$(htmldir_loc)/qmckl.css $(builddir)/$(htmldir_loc)/qmckl.css if QMCKL_DEVEL @@ -102,9 +105,9 @@ endif EXTRA_DIST += $(ORG_FILES) $(TANGLED_FILES) $(EXPORTED_FILES) $(test_qmckl_f) -BUILT_SOURCES = $(C_FILES) $(F_FILES) $(FH_FUNC_FILES) $(FH_TYPE_FILES) $(H_FUNC_FILES) $(H_TYPE_FILES) $(H_PRIVATE_FUNC_FILES) $(H_PRIVATE_TYPE_FILES) $(src_qmckl_f) $(test_qmckl_f) $(qmckl_h) $(header_tests) +BUILT_SOURCES = $(C_FILES) $(F_FILES) $(FH_FUNC_FILES) $(FH_TYPE_FILES) $(H_FUNC_FILES) $(H_TYPE_FILES) $(H_PRIVATE_FUNC_FILES) $(H_PRIVATE_TYPE_FILES) $(src_qmckl_f) $(test_qmckl_f) $(qmckl_h) $(header_tests) $(htmlize_el) -CLEANFILES += $(BUILT_SOURCES) $(C_TEST_FILES) $(F_TEST_FILES) $(TANGLED_FILES) $(C_TEST_FILES) $(F_TEST_FILES) $(src_qmckl_f) $(test_qmckl_f) $(qmckl_h) $(HTML_FILES) $(TEXT_FILES) $(EXPORTED_FILES) $(header_tests) +CLEANFILES += $(BUILT_SOURCES) $(C_TEST_FILES) $(F_TEST_FILES) $(TANGLED_FILES) $(C_TEST_FILES) $(F_TEST_FILES) $(src_qmckl_f) $(test_qmckl_f) $(qmckl_h) $(HTML_FILES) $(TEXT_FILES) $(EXPORTED_FILES) $(header_tests) $(htmlize_el) EXTRA_DIST += \ tools/build_doc.sh \ @@ -152,8 +155,7 @@ $(src_qmckl_f): $(FH_FUNC_FILES) $(FH_TYPE_FILES) $(cat_h_verbose)top_builddir=$(abs_top_builddir) srcdir=$(abs_srcdir) src_qmckl_f=$(src_qmckl_f) $(srcdir)/tools/build_qmckl_f.sh $(htmlize_el): - $(MKDIR_P) $(htmldir_loc) - $(MKDIR_P) $(textdir_loc) + $(MKDIR_P) $(htmldir_loc) $(textdir_loc) abs_srcdir=$(abs_srcdir) $(srcdir)/tools/install_htmlize.sh $(htmlize_el) tests/chbrclf.h: $(qmckl_h) diff --git a/tools/build_doc.sh b/tools/build_doc.sh index 519b417..de7f21b 100755 --- a/tools/build_doc.sh +++ b/tools/build_doc.sh @@ -9,12 +9,7 @@ if [[ -z ${srcdir} ]] ; then exit 1 fi -if [[ -z ${top_builddir} ]] ; then - echo "Error: srcdir environment variable is not defined" - exit 1 -fi - -readonly DOCS=${top_builddir}/share/doc/qmckl/ +readonly DOCS=share/doc/qmckl/ readonly ORG=${srcdir}/org/ readonly HTMLIZE=${DOCS}/html/htmlize.el readonly CONFIG_DOC=${srcdir}/tools/config_doc.el @@ -47,7 +42,7 @@ function extract_doc() for i in $@ do exported=${i%.org}.exported - exported=${top_builddir}/src/$(basename $exported) + exported=${srcdir}/src/$(basename $exported) NOW=$(date +"%m%d%H%M.%S") extract_doc ${i} &> $exported rc=$? diff --git a/tools/build_makefile.py b/tools/build_makefile.py index f85bcdb..cb4b33e 100755 --- a/tools/build_makefile.py +++ b/tools/build_makefile.py @@ -49,8 +49,8 @@ def main(): f_test_o = "tests/test_"+i+"_f.$(OBJEXT)" c_test = "tests/test_"+i+".c" f_test = "tests/test_"+i+"_f.F90" - html = "share/doc/qmckl/html/"+i+".html" - text = "share/doc/qmckl/text/"+i+".txt" + html = "$(htmldir_loc)/"+i+".html" + text = "$(textdir_loc)/"+i+".txt" i="src/"+i @@ -267,7 +267,7 @@ def main(): for f in sorted(DEPS_DOC.keys()): output += [ DEPS_DOC[f] + ": $(srcdir)/" + f + " $(htmlize_el)", - "\t$(export_verbose)top_builddir=$(abs_top_builddir) srcdir=$(abs_srcdir) $(srcdir)/tools/missing bash $(srcdir)/tools/build_doc.sh $(srcdir)/"+f, + "\tsrcdir=$(abs_srcdir) $(srcdir)/tools/missing bash $(srcdir)/tools/build_doc.sh $(srcdir)/"+f, "" ] output += ["endif"] From 73399e24ec4a09059fe090abc4b2c39cbe5c5a6a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 18 Feb 2022 01:24:37 +0100 Subject: [PATCH 17/19] Fix fortran strings in trexio interface --- .github/workflows/test-build.yml | 28 +++++++++++++- org/qmckl_ao.org | 66 ++++++++++++++++++++++++++++++++ org/qmckl_nucleus.org | 6 +-- org/qmckl_trexio.org | 17 +++++--- 4 files changed, 106 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index a4dd421..41b59e0 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -40,7 +40,9 @@ jobs: - name: Build QMCkl run: | ./autogen.sh - ./configure --enable-silent-rules --enable-debug + mkdir _build + cd _build + ../configure --enable-silent-rules --enable-debug make -j 4 - name: Run test @@ -72,7 +74,7 @@ jobs: - uses: actions/checkout@v2 - name: install dependencies run: brew install emacs hdf5 automake pkg-config - + - name: Symlink gfortran (macOS) if: runner.os == 'macOS' run: | @@ -89,6 +91,28 @@ jobs: git clone https://github.com/TREX-CoE/trexio.git cd trexio ./autogen.sh + mkdir _build + cd _build + ../configure --enable-silent-rules + make -j 4 + + - name: Run test + run: make -j 4 check + + - name: Archive test log file + if: failure() + uses: actions/upload-artifact@v2 + with: + name: test-report-ubuntu + path: test-suite.log + + - name: Dist test + run: make distcheck + + - name: Archive test log file + if: failure() + uses: actions/upload-artifact@v2 + with: ./configure --prefix=${PWD}/_install --enable-silent-rules make -j 4 make install diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 1202117..a982379 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2822,6 +2822,72 @@ qmckl_get_ao_basis_ao_vgl (qmckl_context context, end interface #+end_src + Uses the give array to compute the VGL. + + #+begin_src c :comments org :tangle (eval h_func) :noweb yes +qmckl_exit_code +qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, + double* const ao_vgl, + const int64_t size_max); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, + double* const ao_vgl, + const int64_t size_max) +{ + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( context, + QMCKL_INVALID_CONTEXT, + "qmckl_get_ao_basis_ao_vgl", + NULL); + } + + qmckl_exit_code rc; + + qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + assert (ctx != NULL); + + int64_t sze = ctx->ao_basis.ao_num * 5 * ctx->point.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_ao_basis_ao_vgl", + "input array too small"); + } + + rc = qmckl_context_touch(context); + if (rc != QMCKL_SUCCESS) return rc; + + double* old_array = ctx->ao_basis.ao_vgl; + + ctx->ao_basis.ao_vgl = ao_vgl; + + rc = qmckl_provide_ao_vgl(context); + if (rc != QMCKL_SUCCESS) return rc; + + ctx->ao_basis.ao_vgl = old_array; + + return QMCKL_SUCCESS; +} + #+end_src + + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_get_ao_basis_ao_vgl_inplace (context, & + ao_vgl, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: ao_vgl(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_ao_basis_ao_vgl_inplace + end interface + #+end_src + * Radial part ** General functions for Gaussian basis functions diff --git a/org/qmckl_nucleus.org b/org/qmckl_nucleus.org index da003b5..0191afe 100644 --- a/org/qmckl_nucleus.org +++ b/org/qmckl_nucleus.org @@ -668,14 +668,14 @@ assert(!qmckl_nucleus_provided(context)); rc = qmckl_get_nucleus_coord (context, 'N', nucl_coord2, 3*nucl_num); assert(rc == QMCKL_SUCCESS); for (size_t k=0 ; k<3 ; ++k) { - for (size_t i=0 ; i Date: Fri, 18 Feb 2022 01:54:41 +0100 Subject: [PATCH 18/19] Small fix in Makefile --- Makefile.am | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Makefile.am b/Makefile.am index d658fa5..8eff2f1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -93,10 +93,6 @@ text: $(htmlize_el) $(dist_text_DATA) doc: html text -$(htmldir_loc)/qmckl.css: $(htmlize_el) - $(MKDIR_P) $(htmldir_loc) $(textdir_loc) - cp -f $(top_srcdir)/$(htmldir_loc)/qmckl.css $(builddir)/$(htmldir_loc)/qmckl.css - if QMCKL_DEVEL if VFC_CI From d919c53c42a53d0e5f8f400f9075fd2575b10638 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Feb 2022 19:24:18 +0100 Subject: [PATCH 19/19] Fix bug in HPC AOs --- configure.ac | 4 +- org/qmckl_ao.org | 209 +++++++++++++++++++++++++++-------------------- 2 files changed, 124 insertions(+), 89 deletions(-) diff --git a/configure.ac b/configure.ac index eb31b93..fcc1dc5 100644 --- a/configure.ac +++ b/configure.ac @@ -222,13 +222,13 @@ AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], if test "$ok" = "yes"; then if test "$GCC" = "yes"; then CFLAGS="$CFLAGS \ --Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \ +-g -Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \ -Wpointer-arith -Wcast-align -Wpedantic -Wextra -Walloc-zero -Werror \ " fi if test "$GFC" = "yes"; then FCFLAGS="$FCFLAGS \ --fcheck=all -Waliasing -Wampersand -Wconversion \ +-g -fcheck=all -Waliasing -Wampersand -Wconversion \ -Wsurprising -ffpe-trap=zero,overflow,underflow \ -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation \ -Wreal-q-constant -Wuninitialized -fbacktrace -finit-real=nan" diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index a982379..55057bd 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3154,7 +3154,7 @@ assert(0 == test_qmckl_ao_gaussian_vgl(context)); const double* coord, const double* nucl_coord, const double* expo, - double* const primitive_vgl ); + double* const primitive_vgl ); #+end_src @@ -3493,7 +3493,7 @@ for (j=0 ; j lmax+3); double pows[3][size_max]; - - for (int32_t i=0 ; i<=2 ; ++i) { + + for (int32_t i=0 ; i<3 ; ++i) { pows[0][i] = 1.0; pows[1][i] = 1.0; pows[2][i] = 1.0; } - + for (int32_t i=3 ; i<=lmax+2 ; ++i) { pows[0][i] = pows[0][i-1] * Y[0]; pows[1][i] = pows[1][i-1] * Y[1]; pows[2][i] = pows[2][i-1] * Y[2]; } - - int32_t* l[24]; + + int32_t* l[size_max]; for (int32_t i=0 ; i=0 ; --a) { double db = dd-da; - + for (int32_t b=d-a ; b>=0 ; --b) { - const int32_t c = d - a - b; + const int32_t c = d - a - b; const double dc = dd - da - db; - + double xy = pows[0][a+2] * pows[1][b+2]; double yz = pows[1][b+2] * pows[2][c+2]; double xz = pows[0][a+2] * pows[2][c+2]; - + l[m][0] = a; l[m][1] = b; l[m][2] = c; - + vgl1[m] = xy * pows[2][c+2]; - + xy *= dc; xz *= db; yz *= da; - + vgl2[m] = pows[0][a+1] * yz; vgl3[m] = pows[1][b+1] * xz; vgl4[m] = pows[2][c+1] * xy; - + vgl5[m] = (da-1.) * pows[0][a] * yz + (db-1.) * pows[1][b] * xz + (dc-1.) * pows[2][c] * xy; - + db -= 1.0; ++m; } @@ -5040,13 +5035,13 @@ assert(0 == test_qmckl_ao_polynomial_vgl(context)); double X[3] = { 1.1, 2.2, 3.3 }; double R[3] = { 0.2, 1.1, 3.0 }; -int32_t ldv[4] = {1, 4, 10, 20}; -for (int32_t ldl=3 ; ldl<5 ; ++ldl) { +int32_t ldv[8] = {1, 4, 10, 20, 35, 56, 84, 120}; +for (int32_t ldl=3 ; ldl<=5 ; ++ldl) { int64_t n; - int32_t L0[24][ldl]; - int32_t L1[24][ldl]; + int32_t L0[200][ldl]; + int32_t L1[200][ldl]; printf("ldl=%d\n", ldl); - for (int32_t lmax=0 ; lmax<=3 ; lmax++) { + for (int32_t lmax=0 ; lmax<=7 ; lmax++) { double VGL0[5][ldv[lmax]]; double VGL1[5][ldv[lmax]]; memset(&L0[0][0], 0, sizeof(L0)); @@ -5064,7 +5059,7 @@ for (int32_t ldl=3 ; ldl<5 ; ++ldl) { assert( L0[l][k] == L1[l][k] ); } } - + for (int32_t k=0 ; k<5 ; ++k) { for (int32_t l=0 ; lao_basis.ao_vgl = ao_vgl; } #ifdef HAVE_HPC - rc = qmckl_compute_ao_vgl_hpc(context, + if (ctx->ao_basis.type == 'G') { + rc = qmckl_compute_ao_vgl_hpc_gaussian(context, ctx->ao_basis.ao_num, ctx->ao_basis.shell_num, ctx->ao_basis.prim_num, @@ -5710,6 +5706,45 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) ctx->ao_basis.exponent, ctx->ao_basis.coefficient_normalized, ctx->ao_basis.ao_vgl); + /* + } else if (ctx->ao_basis.type == 'S') { + rc = qmck_compute_ao_vgl_hpc_slater(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->ao_basis.prim_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.shell_prim_index, + ctx->ao_basis.shell_prim_num, + ctx->ao_basis.ao_factor, + ctx->ao_basis.exponent, + ctx->ao_basis.coefficient_normalized, + ctx->ao_basis.ao_vgl); + ,*/ + } else { + rc = qmckl_compute_ao_vgl_doc(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.ao_factor, + ctx->ao_basis.shell_vgl, + ctx->ao_basis.ao_vgl); + } #else rc = qmckl_compute_ao_vgl_doc(context, ctx->ao_basis.ao_num,