From 79d4cf130bcb53ea21d461098cf7ab17a415a6ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Thu, 24 Mar 2022 10:06:25 +0100 Subject: [PATCH 01/27] Add detection of configure arguments to enable GPU offloading As of now, only OpenMP offload will be implemented as a test. --- configure.ac | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/configure.ac b/configure.ac index 56f0ed1..668fe2a 100644 --- a/configure.ac +++ b/configure.ac @@ -218,6 +218,29 @@ AS_IF([test "$HAVE_HPC" = "yes"], [ AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines]) ]) +# Enable Verificarlo tests +AC_ARG_ENABLE([vfc_ci], +[ --enable-vfc_ci Build the library with vfc_ci support], +[case "${enableval}" in + yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";; + no) vfc_ci=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;; +esac],[vfc_ci=false]) +AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue]) + +if test "$FC" = "verificarlo-f"; then + AC_MSG_NOTICE(verificarlo-f detected) + # Arguments order is important here + FCFLAGS="-Mpreprocess $FCFLAGS" +fi + +# Enable GPU offloading +# OpenMP offloading +AC_ARG_ENABLE(openmp-offload, [AS_HELP_STRING([--openmp-offload],[Use OpenMP-offloaded functions])], HAVE_OPENMP_OFFLOAD=$enableval, HAVE_OPENMP_OFFLOAD=no) +AS_IF([test "$HAVE_OPENMP_OFFLOAD" = "yes"], [ + AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) +]) + AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) if test "$ok" = "yes"; then if test "$GCC" = "yes"; then @@ -313,21 +336,6 @@ if test "x${QMCKL_DEVEL}" != "x"; then fi -# Enable Verificarlo tests -AC_ARG_ENABLE([vfc_ci], -[ --enable-vfc_ci Build the library with vfc_ci support], -[case "${enableval}" in - yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";; - no) vfc_ci=false ;; - *) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;; -esac],[vfc_ci=false]) -AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue]) - -if test "$FC" = "verificarlo-f"; then - AC_MSG_NOTICE(verificarlo-f detected) - # Arguments order is important here - FCFLAGS="-Mpreprocess $FCFLAGS" -fi #PKG-CONFIG #mkl-dynamic-lp64-seq @@ -363,6 +371,7 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} +OpenMP offload .: ${HAVE_OPENMP_OFFLOAD} Package features: ${ARGS} From 5e3231e7e39fd3f07bc68e7e9d8ad7875aa8dd47 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Thu, 24 Mar 2022 16:35:29 +0100 Subject: [PATCH 02/27] Add selection mechanism for offload mode in Jastrow This system adds an additional field to the QMCkl context to store the offload mode currently in use for each kernel (in this commit, this has been implemented for Jastrow as an example). This will be useful to test different offloading versions that can be easily toggled on/off at compilation and at runtime. --- org/ao_grid.f90 | 114 ++++++++++++++++++ org/qmckl_jastrow.org | 269 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 org/ao_grid.f90 diff --git a/org/ao_grid.f90 b/org/ao_grid.f90 new file mode 100644 index 0000000..685313f --- /dev/null +++ b/org/ao_grid.f90 @@ -0,0 +1,114 @@ +subroutine qmckl_check_error(rc, message) + use qmckl + implicit none + integer(qmckl_exit_code), intent(in) :: rc + character(len=*) , intent(in) :: message + character(len=128) :: str_buffer + if (rc /= QMCKL_SUCCESS) then + print *, message + call qmckl_string_of_error(rc, str_buffer) + print *, str_buffer + call exit(rc) + end if +end subroutine qmckl_check_error + +program ao_grid + use qmckl + implicit none + + integer(qmckl_context) :: qmckl_ctx ! QMCkl context + integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions + + character(len=128) :: trexio_filename + character(len=128) :: str_buffer + integer :: ao_id + integer :: point_num_x + + integer(c_int64_t) :: nucl_num + double precision, allocatable :: nucl_coord(:,:) + + integer(c_int64_t) :: point_num + integer(c_int64_t) :: ao_num + integer(c_int64_t) :: ipoint, i, j, k + double precision :: x, y, z, dr(3) + double precision :: rmin(3), rmax(3) + double precision, allocatable :: points(:,:) + double precision, allocatable :: ao_vgl(:,:,:) + +if (iargc() /= 3) then + print *, 'Syntax: ao_grid ' + call exit(-1) +end if +call getarg(1, trexio_filename) +call getarg(2, str_buffer) +read(str_buffer, *) ao_id +call getarg(3, str_buffer) +read(str_buffer, *) point_num_x + +if (point_num_x < 0 .or. point_num_x > 300) then + print *, 'Error: 0 < point_num < 300' + call exit(-1) +end if + +qmckl_ctx = qmckl_context_create() +rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) +call qmckl_check_error(rc, 'Read TREXIO') + +rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) +call qmckl_check_error(rc, 'Getting ao_num') + +if (ao_id < 0 .or. ao_id > ao_num) then + print *, 'Error: 0 < ao_id < ', ao_num + call exit(-1) +end if + +rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) +call qmckl_check_error(rc, 'Get nucleus num') + +allocate( nucl_coord(3, nucl_num) ) +rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) +call qmckl_check_error(rc, 'Get nucleus coord') + +rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 +rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 +rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 + +rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0 +rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0 +rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 + +dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) + +point_num = point_num_x**3 +allocate( points(point_num, 3) ) +ipoint=0 +z = rmin(3) +do k=1,point_num_x + y = rmin(2) + do j=1,point_num_x + x = rmin(1) + do i=1,point_num_x + ipoint = ipoint+1 + points(ipoint,1) = x + points(ipoint,2) = y + points(ipoint,3) = z + x = x + dr(1) + end do + y = y + dr(2) + end do + z = z + dr(3) +end do + +rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) +call qmckl_check_error(rc, 'Setting points') + +allocate( ao_vgl(ao_num, 5, point_num) ) +rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) +call qmckl_check_error(rc, 'Setting points') + +do ipoint=1, point_num + print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) +end do + +deallocate( nucl_coord, points, ao_vgl ) +end program ao_grid diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 61062af..6a2c2a2 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -327,7 +327,14 @@ kappa_inv = 1.0/kappa ** Data structure - #+begin_src c :comments org :tangle (eval h_private_type) +#+begin_src c :comments org :tangle (eval h_type) +typedef enum qmckl_jastrow_offload_type{ + OFFLOAD_NONE, + OFFLOAD_OPENMP +} qmckl_jastrow_offload_type; +#+end_src + +#+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; int64_t aord_num; @@ -372,6 +379,7 @@ typedef struct qmckl_jastrow_struct{ uint64_t een_rescaled_n_deriv_e_date; bool provided; char * type; + qmckl_jastrow_offload_type offload_type; } qmckl_jastrow_struct; #+end_src @@ -416,6 +424,7 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max); +qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type); #+end_src Along with these core functions, calculation of the jastrow factor @@ -713,6 +722,32 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context, return QMCKL_SUCCESS; } +qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return (char) 0; + } + + if (offload_type == NULL) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_get_jastrow_offload_type", + "offload_type is a null pointer"); + } + + qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + assert (ctx != NULL); + + int32_t mask = 1 << 0; + + if ( (ctx->jastrow.uninitialized & mask) != 0) { + return QMCKL_NOT_PROVIDED; + } + + *offload_type = ctx->jastrow.offload_type; + return QMCKL_SUCCESS; +} + #+end_src ** Initialization functions @@ -727,6 +762,7 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max); +qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type); #+end_src #+NAME:pre2 @@ -1063,6 +1099,14 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, <> } +qmckl_exit_code +qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type) +{ +<> + ctx->jastrow.offload_type = offload_type; + return QMCKL_SUCCESS; +} + #+end_src When the required information is completely entered, other data structures are @@ -6093,6 +6137,30 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } + /* Choose the correct compute function (depending on offload type) */ + bool default_compute = true; + +#ifdef HAVE_OPENMP_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENMP) { + qmckl_exit_code rc = + qmckl_compute_factor_een_deriv_e_omp_offload(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); + default_compute = false; + } +#endif + + if(default_compute) { qmckl_exit_code rc = qmckl_compute_factor_een_deriv_e(context, ctx->electron.walk_num, @@ -6107,6 +6175,8 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); + } + if (rc != QMCKL_SUCCESS) { return rc; } @@ -6507,6 +6577,203 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src +*** Compute (OpenMP offload)... + :PROPERTIES: + :Name: qmckl_compute_factor_een_deriv_e + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_een_deriv_e_omp_offload_args + | Variable | Type | In/Out | Description | + |--------------------------+---------------------------------------------------------------------+--------+------------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~cord_num~ | ~int64_t~ | in | order of polynomials | + | ~dim_cord_vect~ | ~int64_t~ | in | dimension of full coefficient vector | + | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | in | full coefficient vector | + | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | in | combined indices | + | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | in | Temporary intermediate tensor | + | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | in | vector of non-zero coefficients | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~een_rescaled_n_deriv_e~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | in | Derivative of Electron-nucleus rescaled factor | + | ~factor_een_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Derivative of Electron-nucleus jastrow | + + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +#ifdef HAVE_OPENMP_OFFLOAD +integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & + cord_vect_full, lkpm_combined_index, & + tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: walk_num, elec_num, cord_num, nucl_num, dim_cord_vect + integer*8 , intent(in) :: lkpm_combined_index(dim_cord_vect,4) + double precision , intent(in) :: cord_vect_full(nucl_num, dim_cord_vect) + double precision , intent(in) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision , intent(in) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n_deriv_e(elec_num, 4, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) + + integer*8 :: i, a, j, l, k, p, m, n, nw, ii + double precision :: accu, accu2, cn + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (walk_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_5 + return + endif + + factor_een_deriv_e = 0.0d0 + + do nw =1, walk_num + do n = 1, dim_cord_vect + l = lkpm_combined_index(n, 1) + k = lkpm_combined_index(n, 2) + p = lkpm_combined_index(n, 3) + m = lkpm_combined_index(n, 4) + + do a = 1, nucl_num + cn = cord_vect_full(a, n) + if(cn == 0.d0) cycle + + do ii = 1, 4 + do j = 1, elec_num + factor_een_deriv_e(j,ii,nw) = factor_een_deriv_e(j,ii,nw) + (& + tmp_c(j,a,m,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m+l,nw) + & + (dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & + (dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & + tmp_c(j,a,m+l,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m,nw) & + ) * cn + end do + end do + + cn = cn + cn + do j = 1, elec_num + factor_een_deriv_e(j,4,nw) = factor_een_deriv_e(j,4,nw) + (& + (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m+l,nw) + & + (dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m+l,nw) + & + (dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m+l,nw) + & + (dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m ,nw) + & + (dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m ,nw) + & + (dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m ,nw) & + ) * cn + end do + end do + end do + end do + +end function qmckl_compute_factor_een_deriv_e_omp_offload_f +#endif + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org +#ifdef HAVE_OPENMP_OFFLOAD + qmckl_exit_code qmckl_compute_factor_een_deriv_e_omp_offload ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const int64_t dim_cord_vect, + const double* cord_vect_full, + const int64_t* lkpm_combined_index, + const double* tmp_c, + const double* dtmp_c, + const double* een_rescaled_n, + const double* een_rescaled_n_deriv_e, + double* const factor_een_deriv_e ); +#endif + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none +#ifdef HAVE_OPENMP_OFFLOAD + integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_omp_offload & + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + dim_cord_vect, & + cord_vect_full, & + lkpm_combined_index, & + tmp_c, & + dtmp_c, & + een_rescaled_n, & + een_rescaled_n_deriv_e, & + factor_een_deriv_e) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: walk_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: dim_cord_vect + real (c_double ) , intent(in) :: cord_vect_full(nucl_num,dim_cord_vect) + integer (c_int64_t) , intent(in) :: lkpm_combined_index(dim_cord_vect,4) + real (c_double ) , intent(in) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + real (c_double ) , intent(in) :: dtmp_c(elec_num,4,nucl_num,0:cord_num,0:cord_num-1,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) + + integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_omp_offload_f + info = qmckl_compute_factor_een_deriv_e_omp_offload_f & + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + dim_cord_vect, & + cord_vect_full, & + lkpm_combined_index, & + tmp_c, & + dtmp_c, & + een_rescaled_n, & + een_rescaled_n_deriv_e, & + factor_een_deriv_e) + + end function qmckl_compute_factor_een_deriv_e_omp_offload +#endif + #+end_src + *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np From bcc49ca31215e6bc6456bf30153395e2e5b6d697 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Fri, 25 Mar 2022 13:03:35 +0100 Subject: [PATCH 03/27] Minor fixes to previous commit TODO Start modifying dedicated function to implement offloading Also, as of now, Fortran preprocessor flags should be passed manually, we need to manage this in the configure.ac in the future. For now, when using gfortran, you should pass FCFLAGS="-cpp -DWITH_OPENMP_OFFLOAD" to enable offloading. --- org/qmckl_jastrow.org | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 6a2c2a2..f97464b 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -6604,7 +6604,8 @@ end function qmckl_compute_factor_een_deriv_e_f #+begin_src f90 :comments org :tangle (eval f) :noweb yes #ifdef HAVE_OPENMP_OFFLOAD -integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & +! TODO Add some offload statements +integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & cord_vect_full, lkpm_combined_index, & tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & result(info) @@ -6715,8 +6716,7 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f #endif #+end_src - - #+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none From 383c6ac78af4eaa0b4dc869aba9f4b80c9aad5d7 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Mon, 28 Mar 2022 07:58:01 +0200 Subject: [PATCH 04/27] Add OFFLOAD_FLAGS, OFFLOAD_CFLAGS and OFFLOAD_FCFLAGS vars to configure --- configure.ac | 3 +++ org/qmckl_jastrow.org | 2 ++ 2 files changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 668fe2a..2993873 100644 --- a/configure.ac +++ b/configure.ac @@ -235,10 +235,13 @@ if test "$FC" = "verificarlo-f"; then fi # Enable GPU offloading + # OpenMP offloading AC_ARG_ENABLE(openmp-offload, [AS_HELP_STRING([--openmp-offload],[Use OpenMP-offloaded functions])], HAVE_OPENMP_OFFLOAD=$enableval, HAVE_OPENMP_OFFLOAD=no) AS_IF([test "$HAVE_OPENMP_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) + CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" + FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENMP_OFFLOAD $FCFLAGS" ]) AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index f97464b..cf0903c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -6654,6 +6654,7 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu factor_een_deriv_e = 0.0d0 do nw =1, walk_num + !$omp target do n = 1, dim_cord_vect l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) @@ -6688,6 +6689,7 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu end do end do end do + !$omp end target end do end function qmckl_compute_factor_een_deriv_e_omp_offload_f From 99306473a4cb418c81faf9656ccf8ec40c34bbea Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Wed, 30 Mar 2022 09:01:32 +0200 Subject: [PATCH 05/27] Start OpenACC implementation in Jastro, including compute_dtmp_c --- configure.ac | 12 +-- org/qmckl_jastrow.org | 240 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 214 insertions(+), 38 deletions(-) diff --git a/configure.ac b/configure.ac index 2993873..8bd6747 100644 --- a/configure.ac +++ b/configure.ac @@ -236,12 +236,12 @@ fi # Enable GPU offloading -# OpenMP offloading -AC_ARG_ENABLE(openmp-offload, [AS_HELP_STRING([--openmp-offload],[Use OpenMP-offloaded functions])], HAVE_OPENMP_OFFLOAD=$enableval, HAVE_OPENMP_OFFLOAD=no) -AS_IF([test "$HAVE_OPENMP_OFFLOAD" = "yes"], [ - AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) +# OpenACC offloading +AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) +AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ + AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" - FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENMP_OFFLOAD $FCFLAGS" + FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) @@ -374,7 +374,7 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} -OpenMP offload .: ${HAVE_OPENMP_OFFLOAD} +OpenACC offload : ${HAVE_OPENACC_OFFLOAD} Package features: ${ARGS} diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index cf0903c..70e1a8b 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -330,7 +330,7 @@ kappa_inv = 1.0/kappa #+begin_src c :comments org :tangle (eval h_type) typedef enum qmckl_jastrow_offload_type{ OFFLOAD_NONE, - OFFLOAD_OPENMP + OFFLOAD_OPENACC } qmckl_jastrow_offload_type; #+end_src @@ -4851,7 +4851,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4863,8 +4863,13 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, + /* Choose the correct compute function (depending on offload type) */ + bool default_compute = true; + + #ifdef HAVE_OPENACC_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { + qmckl_exit_code rc = + qmckl_compute_dtmp_c_acc_offload(context, ctx->jastrow.cord_num, ctx->electron.num, ctx->nucleus.num, @@ -4872,8 +4877,26 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; + default_compute = false; + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + #endif + + if(default_compute) { + qmckl_exit_code rc = + qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } } ctx->jastrow.dtmp_c_date = ctx->date; @@ -5439,6 +5462,156 @@ end function qmckl_compute_dtmp_c_f end function qmckl_compute_dtmp_c #+end_src +*** Compute dtmp_c (OpenACC offload) + :PROPERTIES: + :Name: qmckl_compute_dtmp_c_acc_offload + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_dtmp_c_acc_offload_args + | Variable | Type | In/Out | Description | + |--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_dtmp_c_acc_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e_deriv_e(elec_num, 4, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: x, tmp + integer*8 :: i, j, jj, k2, a, l, kk, p, lmax, nw, ii + character :: TransA, TransB + double precision :: alpha, beta + integer*8 :: M, N, K, LDA, LDB, LDC + + TransA = 'N' + TransB = 'N' + alpha = 1.0d0 + beta = 0.0d0 + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + M = 4*elec_num + N = nucl_num*(cord_num + 1) + K = elec_num + LDA = 4*size(een_rescaled_e_deriv_e,1) + LDB = size(een_rescaled_n,1) + LDC = 4*size(dtmp_c,1) + + do nw=1, walk_num + do i=0, cord_num-1 + + ! Single DGEMM + do j=0,cord_num + do jj=1,nucl_num + do k2=1,4 + do kk=1,elec_num + + tmp = 0.0 + do l=1,K + tmp = tmp + & + een_rescaled_e_deriv_e(kk, k2, l, i, nw) * een_rescaled_n(l, jj, j, nw) + enddo + ! affect tmp + dtmp_c(kk, k2, jj, j, i, nw ) = tmp + + enddo + enddo + enddo + enddo + + + !info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + ! een_rescaled_e_deriv_e(1,1,1,i,nw),LDA*1_8, & + ! een_rescaled_n(1,1,0,nw),LDB*1_8, & + ! beta, & + ! dtmp_c(1,1,1,0,i,nw),LDC) + end do + end do + +end function qmckl_compute_dtmp_c_acc_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_dtmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_dtmp_c_acc_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e_deriv_e, + const double* een_rescaled_n, + double* const dtmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_dtmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_dtmp_c_acc_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e_deriv_e(elec_num,4,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: dtmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_dtmp_c_f + info = qmckl_compute_dtmp_c_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) + + end function qmckl_compute_dtmp_c_acc_offload + #+end_src + *** Test #+name: helper_funcs @@ -6140,10 +6313,10 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) /* Choose the correct compute function (depending on offload type) */ bool default_compute = true; -#ifdef HAVE_OPENMP_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENMP) { + #ifdef HAVE_OPENACC_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e_omp_offload(context, + qmckl_compute_factor_een_deriv_e_acc_offload(context, ctx->electron.walk_num, ctx->electron.num, ctx->nucleus.num, @@ -6157,8 +6330,11 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); default_compute = false; + if (rc != QMCKL_SUCCESS) { + return rc; + } } -#endif + #endif if(default_compute) { qmckl_exit_code rc = @@ -6175,10 +6351,9 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); - } - - if (rc != QMCKL_SUCCESS) { - return rc; + if (rc != QMCKL_SUCCESS) { + return rc; + } } ctx->jastrow.factor_een_deriv_e_date = ctx->date; @@ -6577,14 +6752,14 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src -*** Compute (OpenMP offload)... +*** Compute (OpenACC offload) :PROPERTIES: :Name: qmckl_compute_factor_een_deriv_e :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: - #+NAME: qmckl_factor_een_deriv_e_omp_offload_args + #+NAME: qmckl_factor_een_deriv_e_acc_offload_args | Variable | Type | In/Out | Description | |--------------------------+---------------------------------------------------------------------+--------+------------------------------------------------| | ~context~ | ~qmckl_context~ | in | Global state | @@ -6603,9 +6778,8 @@ end function qmckl_compute_factor_een_deriv_e_f #+begin_src f90 :comments org :tangle (eval f) :noweb yes -#ifdef HAVE_OPENMP_OFFLOAD -! TODO Add some offload statements -integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & +#ifdef HAVE_OPENACC_OFFLOAD +integer function qmckl_compute_factor_een_deriv_e_acc_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & cord_vect_full, lkpm_combined_index, & tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & result(info) @@ -6653,8 +6827,8 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu factor_een_deriv_e = 0.0d0 + !$acc parallel do nw =1, walk_num - !$omp target do n = 1, dim_cord_vect l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) @@ -6665,6 +6839,7 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu cn = cord_vect_full(a, n) if(cn == 0.d0) cycle + !$acc loop collapse(2) do ii = 1, 4 do j = 1, elec_num factor_een_deriv_e(j,ii,nw) = factor_een_deriv_e(j,ii,nw) + (& @@ -6677,6 +6852,8 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu end do cn = cn + cn + + !$acc loop do j = 1, elec_num factor_een_deriv_e(j,4,nw) = factor_een_deriv_e(j,4,nw) + (& (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m+l,nw) + & @@ -6689,19 +6866,18 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu end do end do end do - !$omp end target end do - -end function qmckl_compute_factor_een_deriv_e_omp_offload_f + !$acc end parallel +end function qmckl_compute_factor_een_deriv_e_acc_offload_f #endif #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org -#ifdef HAVE_OPENMP_OFFLOAD - qmckl_exit_code qmckl_compute_factor_een_deriv_e_omp_offload ( +#ifdef HAVE_OPENACC_OFFLOAD + qmckl_exit_code qmckl_compute_factor_een_deriv_e_acc_offload ( const qmckl_context context, const int64_t walk_num, const int64_t elec_num, @@ -6718,12 +6894,12 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f #endif #+end_src -#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -#ifdef HAVE_OPENMP_OFFLOAD - integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_omp_offload & +#ifdef HAVE_OPENACC_OFFLOAD + integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_acc_offload & (context, & walk_num, & elec_num, & @@ -6756,8 +6932,8 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f real (c_double ) , intent(in) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) - integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_omp_offload_f - info = qmckl_compute_factor_een_deriv_e_omp_offload_f & + integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_acc_offload_f + info = qmckl_compute_factor_een_deriv_e_acc_offload_f & (context, & walk_num, & elec_num, & @@ -6772,7 +6948,7 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f een_rescaled_n_deriv_e, & factor_een_deriv_e) - end function qmckl_compute_factor_een_deriv_e_omp_offload + end function qmckl_compute_factor_een_deriv_e_acc_offload #endif #+end_src From 9428eaa19e628906ae456fc57680e41652210b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Wed, 30 Mar 2022 16:16:06 +0200 Subject: [PATCH 06/27] Implement computation of tmp_c and dtmp_c in OpenACC These 2 kernels seem to give good speedup compared to the CPU BLAS versions. However, the current GPU implementation of factor_een_deriv seems to be slightly slower (on the tested machine). TODO: - Try to improve factor_een_deriv GPU implem - Try out a cuBLAS implementation of tmp_c and dtmp_c --- org/qmckl_jastrow.org | 235 +++++++++++++++++++++++++++++++++--------- 1 file changed, 184 insertions(+), 51 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 70e1a8b..ccf0c4e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4809,19 +4809,41 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } + /* Choose the correct compute function (depending on offload type) */ + bool default_compute = true; - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; + #ifdef HAVE_OPENACC_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { + qmckl_exit_code rc = + qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } } + #endif + + if(default_compute) { + qmckl_exit_code rc = + qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + ctx->jastrow.tmp_c_date = ctx->date; } @@ -5332,6 +5354,134 @@ end function qmckl_compute_tmp_c_f end function qmckl_compute_tmp_c #+end_src +*** Compute tmp_c (OpenACC offload) + :PROPERTIES: + :Name: qmckl_compute_tmp_c + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_tmp_c_acc_offload_args + | Variable | Type | In/Out | Description | + |------------------+------------------------------------------------------------------+--------+-----------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_tmp_c_acc_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e(elec_num, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: tmp + integer*8 :: i, j, jj, k, l, p, lmax, nw + + + info = QMCKL_SUCCESS + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + !$acc parallel + !$acc loop independent gang worker vector collapse(5) + do nw=1, walk_num + do i=0, cord_num-1 + + do j=0,cord_num + do jj=1,nucl_num + do k=1,elec_num + + tmp = 0.0 + do l=1,elec_num + tmp = tmp + & + een_rescaled_e(k, l, i, nw) * een_rescaled_n(l, jj, j, nw) + end do + tmp_c(k, jj, j, i, nw) = tmp + + end do + end do + end do + + end do + end do + !$acc end parallel + + +end function qmckl_compute_tmp_c_acc_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_tmp_c_acc_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_tmp_c_acc_offload_f + info = qmckl_compute_tmp_c_acc_offload_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) + + end function qmckl_compute_tmp_c_acc_offload + #+end_src *** Compute dtmp_c :PROPERTIES: @@ -5495,19 +5645,10 @@ integer function qmckl_compute_dtmp_c_acc_offload_f(context, cord_num, elec_num, double precision , intent(in) :: een_rescaled_e_deriv_e(elec_num, 4, elec_num, 0:cord_num, walk_num) double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) double precision , intent(out) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) - double precision :: x, tmp - integer*8 :: i, j, jj, k2, a, l, kk, p, lmax, nw, ii - character :: TransA, TransB - double precision :: alpha, beta - integer*8 :: M, N, K, LDA, LDB, LDC - - TransA = 'N' - TransB = 'N' - alpha = 1.0d0 - beta = 0.0d0 + double precision :: tmp + integer*8 :: nw, i, j, jj, k, kk, l info = QMCKL_SUCCESS - if (context == QMCKL_NULL_CONTEXT) then info = QMCKL_INVALID_CONTEXT return @@ -5528,44 +5669,32 @@ integer function qmckl_compute_dtmp_c_acc_offload_f(context, cord_num, elec_num, return endif - M = 4*elec_num - N = nucl_num*(cord_num + 1) - K = elec_num - LDA = 4*size(een_rescaled_e_deriv_e,1) - LDB = size(een_rescaled_n,1) - LDC = 4*size(dtmp_c,1) - + !$acc parallel + !$acc loop independent gang worker vector collapse(6) do nw=1, walk_num do i=0, cord_num-1 - ! Single DGEMM - do j=0,cord_num + do j=0,cord_num do jj=1,nucl_num - do k2=1,4 + do k=1,4 do kk=1,elec_num tmp = 0.0 - do l=1,K + do l=1,elec_num tmp = tmp + & - een_rescaled_e_deriv_e(kk, k2, l, i, nw) * een_rescaled_n(l, jj, j, nw) - enddo - ! affect tmp - dtmp_c(kk, k2, jj, j, i, nw ) = tmp + een_rescaled_e_deriv_e(kk, k, l, i, nw) * een_rescaled_n(l, jj, j, nw) + end do + dtmp_c(kk, k, jj, j, i, nw ) = tmp - enddo - enddo - enddo - enddo - - - !info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & - ! een_rescaled_e_deriv_e(1,1,1,i,nw),LDA*1_8, & - ! een_rescaled_n(1,1,0,nw),LDB*1_8, & - ! beta, & - ! dtmp_c(1,1,1,0,i,nw),LDC) + end do + end do end do end do + end do + end do + !$acc end parallel + end function qmckl_compute_dtmp_c_acc_offload_f #+end_src @@ -5605,8 +5734,8 @@ end function qmckl_compute_dtmp_c_acc_offload_f real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: dtmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) - integer(c_int32_t), external :: qmckl_compute_dtmp_c_f - info = qmckl_compute_dtmp_c_f & + integer(c_int32_t), external :: qmckl_compute_dtmp_c_acc_offload_f + info = qmckl_compute_dtmp_c_acc_offload_f & (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) end function qmckl_compute_dtmp_c_acc_offload @@ -6316,7 +6445,11 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) #ifdef HAVE_OPENACC_OFFLOAD if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e_acc_offload(context, + // CPU version + qmckl_compute_factor_een_deriv_e(context, + + // GPU version : No speedup on this kernel yet + // qmckl_compute_factor_een_deriv_e_acc_offload(context, ctx->electron.walk_num, ctx->electron.num, ctx->nucleus.num, From 26bbd6f3412aedac48c55d1a9fbc7325df74d568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Fri, 1 Apr 2022 09:19:56 +0200 Subject: [PATCH 07/27] Start work on cuBLAS implementation TODO Replace CPU BLAS calls by cuBLAS calls (will probably require to write a Fortran to the functions we're interested in, at least DGEMMs) --- org/qmckl_jastrow.org | 297 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 296 insertions(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ccf0c4e..8e2a00c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -330,7 +330,8 @@ kappa_inv = 1.0/kappa #+begin_src c :comments org :tangle (eval h_type) typedef enum qmckl_jastrow_offload_type{ OFFLOAD_NONE, - OFFLOAD_OPENACC + OFFLOAD_OPENACC, + OFFLOAD_CUBLAS } qmckl_jastrow_offload_type; #+end_src @@ -4829,6 +4830,23 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } #endif + #ifdef HAVE_CUBLAS_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { + qmckl_exit_code rc = + qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + #endif + if(default_compute) { qmckl_exit_code rc = qmckl_compute_tmp_c(context, @@ -4906,6 +4924,24 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) } #endif + #ifdef HAVE_CUBLAS_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { + qmckl_exit_code rc = + qmckl_compute_dtmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + default_compute = false; + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + #endif + if(default_compute) { qmckl_exit_code rc = qmckl_compute_dtmp_c(context, @@ -5483,6 +5519,137 @@ end function qmckl_compute_tmp_c_acc_offload_f end function qmckl_compute_tmp_c_acc_offload #+end_src +*** Compute tmp_c (cuBLAS offload) + :PROPERTIES: + :Name: qmckl_compute_tmp_c_cublas_offload + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_tmp_c_cublas_offload_args + | Variable | Type | In/Out | Description | + |------------------+------------------------------------------------------------------+--------+-----------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_tmp_c_cublas_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e(elec_num, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: x + integer*8 :: i, j, a, l, kk, p, lmax, nw + character :: TransA, TransB + double precision :: alpha, beta + integer*8 :: M, N, K, LDA, LDB, LDC + + TransA = 'N' + TransB = 'N' + alpha = 1.0d0 + beta = 0.0d0 + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + M = elec_num + N = nucl_num*(cord_num + 1) + K = elec_num + LDA = size(een_rescaled_e,1) + LDB = size(een_rescaled_n,1) + LDC = size(tmp_c,1) + + ! Alloc and copy memory on device + + do nw=1, walk_num + do i=0, cord_num-1 + info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + een_rescaled_e(1,1,i,nw),LDA*1_8, & + een_rescaled_n(1,1,0,nw),LDB*1_8, & + beta, & + tmp_c(1,1,0,i,nw),LDC) + end do + end do + +end function qmckl_compute_tmp_c_cublas_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_tmp_c_cublas_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_tmp_c_cublas_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_tmp_c_cublas_offload_f + info = qmckl_compute_tmp_c_cublas_offload_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) + + end function qmckl_compute_tmp_c_cublas_offload + #+end_src + *** Compute dtmp_c :PROPERTIES: :Name: qmckl_compute_dtmp_c @@ -5740,6 +5907,134 @@ end function qmckl_compute_dtmp_c_acc_offload_f end function qmckl_compute_dtmp_c_acc_offload #+end_src +*** Compute dtmp_c (cuBLAS offload) + :PROPERTIES: + :Name: qmckl_compute_dtmp_c + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_dtmp_c_cublas_offload_args + | Variable | Type | In/Out | Description | + |--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_dtmp_c_cublas_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e_deriv_e(elec_num, 4, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: x + integer*8 :: i, j, a, l, kk, p, lmax, nw, ii + character :: TransA, TransB + double precision :: alpha, beta + integer*8 :: M, N, K, LDA, LDB, LDC + + TransA = 'N' + TransB = 'N' + alpha = 1.0d0 + beta = 0.0d0 + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + M = 4*elec_num + N = nucl_num*(cord_num + 1) + K = elec_num + LDA = 4*size(een_rescaled_e_deriv_e,1) + LDB = size(een_rescaled_n,1) + LDC = 4*size(dtmp_c,1) + + do nw=1, walk_num + do i=0, cord_num-1 + info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + een_rescaled_e_deriv_e(1,1,1,i,nw),LDA*1_8, & + een_rescaled_n(1,1,0,nw),LDB*1_8, & + beta, & + dtmp_c(1,1,1,0,i,nw),LDC) + end do + end do + +end function qmckl_compute_dtmp_c_cublas_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_dtmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e_deriv_e, + const double* een_rescaled_n, + double* const dtmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_dtmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_dtmp_c_cublas_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e_deriv_e(elec_num,4,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: dtmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_dtmp_c_cublas_offload_f + info = qmckl_compute_dtmp_c_cublas_offload_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) + + end function qmckl_compute_dtmp_c_cublas_offload + #+end_src *** Test From 1173bb2586ad8938eede917595d216b793a508a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Fri, 1 Apr 2022 17:56:27 +0200 Subject: [PATCH 08/27] Update configure.ac with cuBLAS support (forgotten in last commit) --- configure.ac | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 8bd6747..d2ac039 100644 --- a/configure.ac +++ b/configure.ac @@ -234,7 +234,7 @@ if test "$FC" = "verificarlo-f"; then FCFLAGS="-Mpreprocess $FCFLAGS" fi -# Enable GPU offloading +## Enable GPU offloading # OpenACC offloading AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) @@ -244,6 +244,21 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) +# cuBLAS offloading +AC_ARG_ENABLE(cublas-offload, [AS_HELP_STRING([--cublas-offload],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ + AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) + FCFLAGS="-DHAVE_CUBLAS_OFFLOAD" +]) + +# General offload +AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes" || test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ + CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" + FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS $FCFLAGS" +]) + +## + AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) if test "$ok" = "yes"; then if test "$GCC" = "yes"; then @@ -374,7 +389,8 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} -OpenACC offload : ${HAVE_OPENACC_OFFLOAD} +OpenACC offload.: ${HAVE_OPENACC_OFFLOAD} +cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD} Package features: ${ARGS} From 84013a5f760cd044e43d3144671a0a266cb65e04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Mon, 4 Apr 2022 12:12:11 +0200 Subject: [PATCH 09/27] Cleanup before merging into QMCkl's GPU branch --- org/qmckl_blas.org | 92 +++++++++++++++++++++++++++++++++++++++++++ org/qmckl_jastrow.org | 2 +- 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..0a83b34 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -2288,6 +2288,98 @@ qmckl_transpose (qmckl_context context, #+end_src +* cuBLAS interface (optional) +We propose a cuBLAS version of some QMCkl kernels. However, because cuBLAS is written in C, we need to define a Fortran interface for it. We start by defining functions to manage the cuBLAS handle structure from Fortran, before writing interfaces for the specific cuBLAS functions we are interested in. + +TODO These are the C functions that are supposed to be called from Fortran. We still need to write the interfaces themselves. + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +#include +#endif +#+end_src + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +cublasHandle_t* get_cublas_handle_interfaced(); +#endif +#+end_src + +#+begin_src c :comments org :tangle (eval c) :exports none +#ifdef HAVE_CUBLAS_OFFLOAD +cublasHandle_t* get_cublas_handle_interfaced() { + cublasHandle_t* handle = malloc(sizeof(cublasHandle_t)); + + cublasStatus_t status = cublasCreate(handle); + if (status != CUBLAS_STATUS_SUCCESS){ + fprintf(stderr, "Error while initializing cuBLAS\n"); + exit(1); + } + + return handle; +} +#endif +#+end_src + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +void destroy_cublas_handle_interfaced(cublasHandle_t* handle); +#endif +#+end_src + +#+begin_src c :comments org :tangle (eval c) :exports none +#ifdef HAVE_CUBLAS_OFFLOAD +void destroy_cublas_handle_interfaced(cublasHandle_t* handle) { + if(handle != NULL) { + free(handle); + } +} +#endif +#+end_src + +** DGEMM + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +cublasStatus_t cublasDgemm_f( + cublasHandle_t* handle, + cublasOperation_t* transa, cublasOperation_t* transb, + int* m, int* n, int* k, + const double* alpha, + const double*A, int* lda, + const double* B, int* ldb, + const double* beta, + double*C, int* ldc +); +#endif +#+end_src + +#+begin_src c :comments org :tangle (eval c) :exports none + +#ifdef HAVE_CUBLAS_OFFLOAD +cublasStatus_t cublasDgemm_f( + cublasHandle_t* handle, + cublasOperation_t* transa, cublasOperation_t* transb, + int* m, int* n, int* k, + const double* alpha, + const double*A, int* lda, + const double* B, int* ldb, + const double* beta, + double*C, int* ldc +) { + return cublasDgemm_f( + handle, + transa, transb, + m, n, k, + alpha, A, lda, B,ldb, + beta, C, ldc + ); +} +#endif +#+end_src + + + * End of files :noexport: diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 8e2a00c..e1a7a18 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5592,7 +5592,7 @@ integer function qmckl_compute_tmp_c_cublas_offload_f(context, cord_num, elec_nu LDB = size(een_rescaled_n,1) LDC = size(tmp_c,1) - ! Alloc and copy memory on device + ! TODO Replace with calls to cuBLAS do nw=1, walk_num do i=0, cord_num-1 From 98097e8fa738011f5bc6e156968a82c28e20417e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 11:02:08 +0200 Subject: [PATCH 10/27] Convert GPU implementations to C TODO : Fix naive implementation which seems to be incorrect (probably an issue with indexing) --- org/qmckl_jastrow.org | 1488 +++++++++++++++++++---------------------- 1 file changed, 683 insertions(+), 805 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index e1a7a18..8736c0b 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -151,6 +151,7 @@ int main() { | ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative | | ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part | | ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative | + | ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime | computed data: @@ -335,7 +336,7 @@ typedef enum qmckl_jastrow_offload_type{ } qmckl_jastrow_offload_type; #+end_src -#+begin_src c :comments org :tangle (eval h_private_type) + #+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; int64_t aord_num; @@ -1829,6 +1830,72 @@ integer function qmckl_compute_factor_ee_f(context, walk_num, elec_num, up_num, end function qmckl_compute_factor_ee_f #+end_src +#+begin_src c :comments org :tangle (eval c) :noweb yes + qmckl_exit_code qmckl_compute_factor_ee ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ) { + + int ipar; // can we use a smaller integer? + double pow_ser, x, x1, spin_fact, power_ser; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (bord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + for (int nw = 0; nw < walk_num; ++nw) { + factor_ee[nw] = 0.0; // put init array here. + for (int i = 0; i < elec_num; ++i ) { + for (int j = 0; j < i; ++j) { + //x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x = ee_distance_rescaled[j + i * elec_num + nw*(elec_num * elec_num)]; + x1 = x; + power_ser = 0.0; + spin_fact = 1.0; + ipar = 0; // index of asymp_jasb + + for (int p = 1; p < bord_num; ++p) { + x = x * x1; + power_ser = power_ser + bord_vector[p + 1] * x; + } + + if(i < up_num || j >= up_num) { + spin_fact = 0.5; + ipar = 1; + } + + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ + x1 / \ + (1.0 + bord_vector[1] * \ + x1) \ + -asymp_jasb[ipar] + power_ser; + + } + } + } + + return QMCKL_SUCCESS; +} +#+end_src + #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -1846,49 +1913,7 @@ end function qmckl_compute_factor_ee_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_ee & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - asymp_jasb, & - factor_ee) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: up_num - integer (c_int64_t) , intent(in) , value :: bord_num - real (c_double ) , intent(in) :: bord_vector(bord_num + 1) - real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) - real (c_double ) , intent(in) :: asymp_jasb(2) - real (c_double ) , intent(out) :: factor_ee(walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_ee_f - info = qmckl_compute_factor_ee_f & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - asymp_jasb, & - factor_ee) - - end function qmckl_compute_factor_ee - #+end_src *** Test #+begin_src python :results output :exports none :noweb yes @@ -2569,6 +2594,74 @@ integer function qmckl_compute_factor_en_f(context, walk_num, elec_num, nucl_num end function qmckl_compute_factor_en_f #+end_src + + + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_factor_en ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ) { + + + int ipar; + double x, x1, spin_fact, power_ser; + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (aord_num <= 0) { + return QMCKL_INVALID_ARG_7; + } + + + for (int nw = 0; nw < walk_num; ++nw ) { + // init array + factor_en[nw] = 0.0; + for (int a = 0; a < nucl_num; ++a ) { + for (int i = 0; i < elec_num; ++i ) { + // x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x = en_distance_rescaled[i + a * elec_num + nw * (elec_num * nucl_num)]; + x1 = x; + power_ser = 0.0; + + for (int p = 2; p < aord_num+1; ++p) { + x = x * x1; + power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; + } + + factor_en[nw] = factor_en[nw] + aord_vector[0 + (type_nucl_vector[a]-1)*aord_num] * x1 / \ + (1.0 + aord_vector[1 + (type_nucl_vector[a]-1) * aord_num] * x1) + \ + power_ser; + + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -2588,53 +2681,6 @@ end function qmckl_compute_factor_en_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_en & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - factor_en) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: type_nucl_num - integer (c_int64_t) , intent(in) :: type_nucl_vector(nucl_num) - integer (c_int64_t) , intent(in) , value :: aord_num - real (c_double ) , intent(in) :: aord_vector(aord_num + 1, type_nucl_num) - real (c_double ) , intent(in) :: en_distance_rescaled(elec_num, nucl_num, walk_num) - real (c_double ) , intent(out) :: factor_en(walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_en_f - info = qmckl_compute_factor_en_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - factor_en) - - end function qmckl_compute_factor_en - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np @@ -4002,6 +4048,70 @@ integer function qmckl_compute_een_rescaled_n_f(context, walk_num, elec_num, nuc end function qmckl_compute_een_rescaled_n_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_een_rescaled_n ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ) { + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_5; + } + + // Prepare table of exponentiated distances raised to appropriate power + for (int i = 0; i < (walk_num*(cord_num+1)*nucl_num*elec_num); ++i) { + een_rescaled_n[i] = 17.0; + } + + for (int nw = 0; nw < walk_num; ++nw) { + for (int a = 0; a < nucl_num; ++a) { + for (int i = 0; i < elec_num; ++i) { + // prepare the actual een table + //een_rescaled_n(:, :, 0, nw) = 1.0d0 + een_rescaled_n[i + a * elec_num + 0 + nw * elec_num*nucl_num*(cord_num+1)] = 1.0; + //een_rescaled_n(i, a, 1, nw) = dexp(-rescale_factor_kappa_en * en_distance(i, a, nw)) + een_rescaled_n[i + a*elec_num + elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] = exp(-rescale_factor_kappa_en * \ + en_distance[i + a*elec_num + nw*elec_num*nucl_num]); + } + } + + for (int l = 2; l < (cord_num+1); ++l){ + for (int a = 0; a < nucl_num; ++a) { + for (int i = 0; i < elec_num; ++i) { + een_rescaled_n[i + a*elec_num + l*elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] = een_rescaled_n[i + a*elec_num + (l-1)*elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] *\ + een_rescaled_n[i + a*elec_num + elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)]; + } + } + } + + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -4017,47 +4127,6 @@ end function qmckl_compute_een_rescaled_n_f double* const een_rescaled_n ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_een_rescaled_n & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - en_distance, & - een_rescaled_n) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: cord_num - real (c_double ) , intent(in) , value :: rescale_factor_kappa_en - real (c_double ) , intent(in) :: en_distance(nucl_num,elec_num,walk_num) - real (c_double ) , intent(out) :: een_rescaled_n(nucl_num,elec_num,0:cord_num,walk_num) - - integer(c_int32_t), external :: qmckl_compute_een_rescaled_n_f - info = qmckl_compute_een_rescaled_n_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - en_distance, & - een_rescaled_n) - - end function qmckl_compute_een_rescaled_n - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes @@ -4116,7 +4185,6 @@ assert(fabs(een_rescaled_n[0][1][0][4]-0.023391817607642338) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][3]-0.880957224822116) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][4]-0.027185942659395074) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][5]-0.01343938025140174) < 1.e-12); - #+end_src ** Electron-nucleus rescaled distances for each order and derivatives @@ -4779,7 +4847,6 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) { - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; } @@ -4824,6 +4891,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); + default_compute = false; if (rc != QMCKL_SUCCESS) { return rc; } @@ -4841,6 +4909,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); + default_compute = false; if (rc != QMCKL_SUCCESS) { return rc; } @@ -4871,7 +4940,6 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) { - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; } @@ -5020,6 +5088,43 @@ integer function qmckl_compute_dim_cord_vect_f(context, cord_num, dim_cord_vect) end function qmckl_compute_dim_cord_vect_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_dim_cord_vect ( + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect){ + + int lmax; + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + *dim_cord_vect = 0; + + for (int p=2; p <= cord_num; ++p){ + for (int k=p-1; k >= 0; --k) { + if (k != 0) { + lmax = p - k; + } else { + lmax = p - k - 2; + } + for (int l = lmax; l >= 0; --l) { + if ( ((p - k - l) & 1)==1) continue; + *dim_cord_vect=*dim_cord_vect+1; + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -5031,28 +5136,6 @@ end function qmckl_compute_dim_cord_vect_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_dim_cord_vect & - (context, cord_num, dim_cord_vect) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(out) :: dim_cord_vect - - integer(c_int32_t), external :: qmckl_compute_dim_cord_vect_f - info = qmckl_compute_dim_cord_vect_f & - (context, cord_num, dim_cord_vect) - - end function qmckl_compute_dim_cord_vect - #+end_src - *** Compute cord_vect_full :PROPERTIES: :Name: qmckl_compute_cord_vect_full @@ -5171,7 +5254,7 @@ end function qmckl_compute_cord_vect_full_f | ~context~ | ~qmckl_context~ | in | Global state | | ~cord_num~ | ~int64_t~ | in | Order of polynomials | | ~dim_cord_vect~ | ~int64_t~ | in | dimension of cord full table | - | ~lpkm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | + | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord_vect, & @@ -5227,6 +5310,53 @@ integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord end function qmckl_compute_lkpm_combined_index_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_lkpm_combined_index ( + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ) { + + int kk, lmax, m; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (dim_cord_vect <= 0) { + return QMCKL_INVALID_ARG_3; + } + +/* +*/ + kk = 0; + for (int p = 2; p <= cord_num; ++p) { + for (int k=(p-1); k >= 0; --k) { + if (k != 0) { + lmax = p - k; + } else { + lmax = p - k - 2; + } + for (int l=lmax; l >= 0; --l) { + if (((p - k - l) & 1) == 1) continue; + m = (p - k - l)/2; + lkpm_combined_index[kk ] = l; + lkpm_combined_index[kk + dim_cord_vect] = k; + lkpm_combined_index[kk + 2*dim_cord_vect] = p; + lkpm_combined_index[kk + 3*dim_cord_vect] = m; + kk = kk + 1; + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -5235,32 +5365,10 @@ end function qmckl_compute_lkpm_combined_index_f const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lpkm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_lkpm_combined_index & - (context, cord_num, dim_cord_vect, lpkm_combined_index) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(in) , value :: dim_cord_vect - integer (c_int64_t) , intent(out) :: lpkm_combined_index(dim_cord_vect,4) - - integer(c_int32_t), external :: qmckl_compute_lkpm_combined_index_f - info = qmckl_compute_lkpm_combined_index_f & - (context, cord_num, dim_cord_vect, lpkm_combined_index) - - end function qmckl_compute_lkpm_combined_index - #+end_src *** Compute tmp_c :PROPERTIES: @@ -5348,6 +5456,73 @@ integer function qmckl_compute_tmp_c_f(context, cord_num, elec_num, nucl_num, & end function qmckl_compute_tmp_c_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + + qmckl_exit_code info; + int i, j, a, l, kk, p, lmax, nw; + char TransA, TransB; + double alpha, beta; + int M, N, K, LDA, LDB, LDC; + + TransA = 'N'; + TransB = 'N'; + alpha = 1.0; + beta = 0.0; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + M = elec_num; + N = nucl_num*(cord_num + 1); + K = elec_num; + + LDA = sizeof(een_rescaled_e)/sizeof(double); + LDB = sizeof(een_rescaled_n)/sizeof(double); + LDC = sizeof(tmp_c)/sizeof(double); + + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; ijastrow.factor_een_deriv_e = factor_een_deriv_e; } - /* Choose the correct compute function (depending on offload type) */ - bool default_compute = true; - - #ifdef HAVE_OPENACC_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { - qmckl_exit_code rc = - // CPU version - qmckl_compute_factor_een_deriv_e(context, - - // GPU version : No speedup on this kernel yet - // qmckl_compute_factor_een_deriv_e_acc_offload(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } - } - #endif - - if(default_compute) { qmckl_exit_code rc = qmckl_compute_factor_een_deriv_e(context, ctx->electron.walk_num, @@ -6779,9 +6858,8 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); - if (rc != QMCKL_SUCCESS) { - return rc; - } + if (rc != QMCKL_SUCCESS) { + return rc; } ctx->jastrow.factor_een_deriv_e_date = ctx->date; @@ -7180,206 +7258,6 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src -*** Compute (OpenACC offload) - :PROPERTIES: - :Name: qmckl_compute_factor_een_deriv_e - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+NAME: qmckl_factor_een_deriv_e_acc_offload_args - | Variable | Type | In/Out | Description | - |--------------------------+---------------------------------------------------------------------+--------+------------------------------------------------| - | ~context~ | ~qmckl_context~ | in | Global state | - | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~elec_num~ | ~int64_t~ | in | Number of electrons | - | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | - | ~cord_num~ | ~int64_t~ | in | order of polynomials | - | ~dim_cord_vect~ | ~int64_t~ | in | dimension of full coefficient vector | - | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | in | full coefficient vector | - | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | in | combined indices | - | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | in | Temporary intermediate tensor | - | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | in | vector of non-zero coefficients | - | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | - | ~een_rescaled_n_deriv_e~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | in | Derivative of Electron-nucleus rescaled factor | - | ~factor_een_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Derivative of Electron-nucleus jastrow | - - - #+begin_src f90 :comments org :tangle (eval f) :noweb yes -#ifdef HAVE_OPENACC_OFFLOAD -integer function qmckl_compute_factor_een_deriv_e_acc_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & - cord_vect_full, lkpm_combined_index, & - tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & - result(info) - use qmckl - implicit none - integer(qmckl_context), intent(in) :: context - integer*8 , intent(in) :: walk_num, elec_num, cord_num, nucl_num, dim_cord_vect - integer*8 , intent(in) :: lkpm_combined_index(dim_cord_vect,4) - double precision , intent(in) :: cord_vect_full(nucl_num, dim_cord_vect) - double precision , intent(in) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) - double precision , intent(in) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) - double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) - double precision , intent(in) :: een_rescaled_n_deriv_e(elec_num, 4, nucl_num, 0:cord_num, walk_num) - double precision , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) - - integer*8 :: i, a, j, l, k, p, m, n, nw, ii - double precision :: accu, accu2, cn - - info = QMCKL_SUCCESS - - if (context == QMCKL_NULL_CONTEXT) then - info = QMCKL_INVALID_CONTEXT - return - endif - - if (walk_num <= 0) then - info = QMCKL_INVALID_ARG_2 - return - endif - - if (elec_num <= 0) then - info = QMCKL_INVALID_ARG_3 - return - endif - - if (nucl_num <= 0) then - info = QMCKL_INVALID_ARG_4 - return - endif - - if (cord_num <= 0) then - info = QMCKL_INVALID_ARG_5 - return - endif - - factor_een_deriv_e = 0.0d0 - - !$acc parallel - do nw =1, walk_num - do n = 1, dim_cord_vect - l = lkpm_combined_index(n, 1) - k = lkpm_combined_index(n, 2) - p = lkpm_combined_index(n, 3) - m = lkpm_combined_index(n, 4) - - do a = 1, nucl_num - cn = cord_vect_full(a, n) - if(cn == 0.d0) cycle - - !$acc loop collapse(2) - do ii = 1, 4 - do j = 1, elec_num - factor_een_deriv_e(j,ii,nw) = factor_een_deriv_e(j,ii,nw) + (& - tmp_c(j,a,m,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m+l,nw) + & - (dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & - (dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & - tmp_c(j,a,m+l,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m,nw) & - ) * cn - end do - end do - - cn = cn + cn - - !$acc loop - do j = 1, elec_num - factor_een_deriv_e(j,4,nw) = factor_een_deriv_e(j,4,nw) + (& - (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m+l,nw) + & - (dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m+l,nw) + & - (dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m+l,nw) + & - (dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m ,nw) + & - (dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m ,nw) + & - (dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m ,nw) & - ) * cn - end do - end do - end do - end do - !$acc end parallel -end function qmckl_compute_factor_een_deriv_e_acc_offload_f -#endif - #+end_src - - #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org -#ifdef HAVE_OPENACC_OFFLOAD - qmckl_exit_code qmckl_compute_factor_een_deriv_e_acc_offload ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const int64_t dim_cord_vect, - const double* cord_vect_full, - const int64_t* lkpm_combined_index, - const double* tmp_c, - const double* dtmp_c, - const double* een_rescaled_n, - const double* een_rescaled_n_deriv_e, - double* const factor_een_deriv_e ); -#endif - #+end_src - -#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none -#ifdef HAVE_OPENACC_OFFLOAD - integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_acc_offload & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - dim_cord_vect, & - cord_vect_full, & - lkpm_combined_index, & - tmp_c, & - dtmp_c, & - een_rescaled_n, & - een_rescaled_n_deriv_e, & - factor_een_deriv_e) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(in) , value :: dim_cord_vect - real (c_double ) , intent(in) :: cord_vect_full(nucl_num,dim_cord_vect) - integer (c_int64_t) , intent(in) :: lkpm_combined_index(dim_cord_vect,4) - real (c_double ) , intent(in) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) - real (c_double ) , intent(in) :: dtmp_c(elec_num,4,nucl_num,0:cord_num,0:cord_num-1,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) - real (c_double ) , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_acc_offload_f - info = qmckl_compute_factor_een_deriv_e_acc_offload_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - dim_cord_vect, & - cord_vect_full, & - lkpm_combined_index, & - tmp_c, & - dtmp_c, & - een_rescaled_n, & - een_rescaled_n_deriv_e, & - factor_een_deriv_e) - - end function qmckl_compute_factor_een_deriv_e_acc_offload -#endif - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np From c7dd46da05ae2fb1fd49a5d882ed2ee4787a79bb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 11:44:17 +0200 Subject: [PATCH 11/27] Fixed cppcheck --- .github/workflows/test-build.yml | 1 - org/qmckl_ao.org | 3 +- org/qmckl_blas.org | 6 +- org/qmckl_jastrow.org | 300 +++++++++++------------- org/qmckl_mo.org | 14 +- org/qmckl_sherman_morrison_woodbury.org | 2 +- 6 files changed, 154 insertions(+), 172 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index c7af73f..b4e5b6c 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -4,7 +4,6 @@ on: push: branches: [ master ] pull_request: - branches: [ master ] jobs: x86_ubuntu: diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8dd1067..8289aa4 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } } - rc = QMCKL_SUCCESS; #ifdef HAVE_HPC rc = qmckl_finalize_basis_hpc(context); +#else + rc = QMCKL_SUCCESS; #endif return rc; diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -84,8 +84,8 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { - int64_t size; double* restrict data; + int64_t size; } qmckl_vector; #+end_src @@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { - int64_t size[2]; double* restrict data; + int64_t size[2]; } qmckl_matrix; #+end_src @@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context, #define QMCKL_TENSOR_ORDER_MAX 16 typedef struct qmckl_tensor { + double* restrict data; int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* restrict data; } qmckl_tensor; #+end_src diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..6eaad62 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -837,7 +837,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context, } if (ctx->jastrow.type_nucl_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); + rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_type_nucl_vector", @@ -896,7 +896,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context, } if (ctx->jastrow.aord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector); + rc = qmckl_free(context, ctx->jastrow.aord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -959,7 +959,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context, } if (ctx->jastrow.bord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector); + rc = qmckl_free(context, ctx->jastrow.bord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1029,7 +1029,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, } if (ctx->jastrow.cord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector); + rc = qmckl_free(context, ctx->jastrow.cord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1388,12 +1388,11 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context) ctx->jastrow.asymp_jasb = asymp_jasb; } - qmckl_exit_code rc = - qmckl_compute_asymp_jasb(context, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - rescale_factor_kappa_ee, - ctx->jastrow.asymp_jasb); + rc = qmckl_compute_asymp_jasb(context, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + rescale_factor_kappa_ee, + ctx->jastrow.asymp_jasb); if (rc != QMCKL_SUCCESS) { return rc; } @@ -1470,10 +1469,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const double rescale_factor_kappa_ee, double* const asymp_jasb ) { - double kappa_inv, x, asym_one; - - kappa_inv = 1.0 / rescale_factor_kappa_ee; - if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; } @@ -1482,14 +1477,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( return QMCKL_INVALID_ARG_2; } - asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); + const double kappa_inv = 1.0 / rescale_factor_kappa_ee; + const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); asymp_jasb[0] = asym_one; asymp_jasb[1] = 0.5 * asym_one; for (int i = 0 ; i <= 1; ++i) { - x = kappa_inv; + double x = kappa_inv; for (int p = 1; p < bord_num; ++p){ - x = x * kappa_inv; + x *= kappa_inv; asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x; } } @@ -1672,16 +1668,15 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context) ctx->jastrow.factor_ee = factor_ee; } - qmckl_exit_code rc = - qmckl_compute_factor_ee(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee); + rc = qmckl_compute_factor_ee(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2014,17 +2009,16 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_ee_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee_deriv_e); + rc = qmckl_compute_factor_ee_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->electron.ee_distance_rescaled_deriv_e, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2437,21 +2431,20 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) ctx->jastrow.factor_en = factor_en; } - qmckl_exit_code rc = - qmckl_compute_factor_en(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->jastrow.factor_en); + rc = qmckl_compute_factor_en(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->jastrow.factor_en); if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2784,18 +2777,17 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context) ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_en_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->electron.en_distance_rescaled_deriv_e, - ctx->jastrow.factor_en_deriv_e); + rc = qmckl_compute_factor_en_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->electron.en_distance_rescaled_deriv_e, + ctx->jastrow.factor_en_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3203,14 +3195,13 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) ctx->jastrow.een_rescaled_e = een_rescaled_e; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e); + rc = qmckl_compute_een_rescaled_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3537,16 +3528,15 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_e_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.coord_new.data, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_e_deriv_e); + rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.coord_new.data, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_e_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3917,15 +3907,14 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context) ctx->jastrow.een_rescaled_n = een_rescaled_n; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_n(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n); + rc = qmckl_compute_een_rescaled_n(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4256,18 +4245,17 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_n_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.coord_new.data, - ctx->nucleus.coord.data, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e); + rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.coord_new.data, + ctx->nucleus.coord.data, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4758,14 +4746,13 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context) ctx->jastrow.cord_vect_full = cord_vect_full; } - qmckl_exit_code rc = - qmckl_compute_cord_vect_full(context, - ctx->nucleus.num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.cord_vector, - ctx->jastrow.cord_vect_full); + rc = qmckl_compute_cord_vect_full(context, + ctx->nucleus.num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.cord_vector, + ctx->jastrow.cord_vect_full); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4809,11 +4796,10 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) ctx->jastrow.lkpm_combined_index = lkpm_combined_index; } - qmckl_exit_code rc = - qmckl_compute_lkpm_combined_index(context, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.lkpm_combined_index); + rc = qmckl_compute_lkpm_combined_index(context, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.lkpm_combined_index); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4858,15 +4844,14 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c = tmp_c; } - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4899,7 +4884,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4911,15 +4896,14 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5943,18 +5927,17 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context) ctx->jastrow.factor_een = factor_een; } - qmckl_exit_code rc = - qmckl_compute_factor_een(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.factor_een); + rc = qmckl_compute_factor_een(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.factor_een); if (rc != QMCKL_SUCCESS) { return rc; } @@ -6456,20 +6439,19 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); + rc = qmckl_compute_factor_een_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index d920396..0928db6 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -849,13 +849,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, int64_t n0 = nidx-4; n0 = n0 < 0 ? 0 : n0; - for (int64_t n=n0 ; n < nidx ; n+=1) { - const double* restrict ck = coef_normalized_t + idx[n]*mo_num; - const double a1 = av1[n]; - const double a2 = av2[n]; - const double a3 = av3[n]; - const double a4 = av4[n]; - const double a5 = av5[n]; + for (int64_t m=n0 ; m < nidx ; m+=1) { + const double* restrict ck = coef_normalized_t + idx[m]*mo_num; + const double a1 = av1[m]; + const double a2 = av2[m]; + const double a3 = av3[m]; + const double a4 = av4[m]; + const double a5 = av5[m]; #ifdef HAVE_OPENMP #pragma omp simd diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 598ad32..ae358e8 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context, rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant); if (rc != 0) { // Send the entire block to slagel_splitting uint64_t l = 0; - rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, + (void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant); later = later + l; } From 94035929e431eab6a475f287708a0fc5babb427a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 11:44:17 +0200 Subject: [PATCH 12/27] Fixed cppcheck --- .github/workflows/test-build.yml | 2 - org/qmckl_ao.org | 3 +- org/qmckl_blas.org | 6 +- org/qmckl_jastrow.org | 300 +++++++++++------------- org/qmckl_mo.org | 14 +- org/qmckl_sherman_morrison_woodbury.org | 2 +- 6 files changed, 154 insertions(+), 173 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index c7af73f..12d4503 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -2,9 +2,7 @@ name: test-build on: push: - branches: [ master ] pull_request: - branches: [ master ] jobs: x86_ubuntu: diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8dd1067..8289aa4 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } } - rc = QMCKL_SUCCESS; #ifdef HAVE_HPC rc = qmckl_finalize_basis_hpc(context); +#else + rc = QMCKL_SUCCESS; #endif return rc; diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -84,8 +84,8 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { - int64_t size; double* restrict data; + int64_t size; } qmckl_vector; #+end_src @@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { - int64_t size[2]; double* restrict data; + int64_t size[2]; } qmckl_matrix; #+end_src @@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context, #define QMCKL_TENSOR_ORDER_MAX 16 typedef struct qmckl_tensor { + double* restrict data; int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* restrict data; } qmckl_tensor; #+end_src diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..6eaad62 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -837,7 +837,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context, } if (ctx->jastrow.type_nucl_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); + rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_type_nucl_vector", @@ -896,7 +896,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context, } if (ctx->jastrow.aord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector); + rc = qmckl_free(context, ctx->jastrow.aord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -959,7 +959,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context, } if (ctx->jastrow.bord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector); + rc = qmckl_free(context, ctx->jastrow.bord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1029,7 +1029,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, } if (ctx->jastrow.cord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector); + rc = qmckl_free(context, ctx->jastrow.cord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1388,12 +1388,11 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context) ctx->jastrow.asymp_jasb = asymp_jasb; } - qmckl_exit_code rc = - qmckl_compute_asymp_jasb(context, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - rescale_factor_kappa_ee, - ctx->jastrow.asymp_jasb); + rc = qmckl_compute_asymp_jasb(context, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + rescale_factor_kappa_ee, + ctx->jastrow.asymp_jasb); if (rc != QMCKL_SUCCESS) { return rc; } @@ -1470,10 +1469,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const double rescale_factor_kappa_ee, double* const asymp_jasb ) { - double kappa_inv, x, asym_one; - - kappa_inv = 1.0 / rescale_factor_kappa_ee; - if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; } @@ -1482,14 +1477,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( return QMCKL_INVALID_ARG_2; } - asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); + const double kappa_inv = 1.0 / rescale_factor_kappa_ee; + const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); asymp_jasb[0] = asym_one; asymp_jasb[1] = 0.5 * asym_one; for (int i = 0 ; i <= 1; ++i) { - x = kappa_inv; + double x = kappa_inv; for (int p = 1; p < bord_num; ++p){ - x = x * kappa_inv; + x *= kappa_inv; asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x; } } @@ -1672,16 +1668,15 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context) ctx->jastrow.factor_ee = factor_ee; } - qmckl_exit_code rc = - qmckl_compute_factor_ee(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee); + rc = qmckl_compute_factor_ee(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2014,17 +2009,16 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_ee_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee_deriv_e); + rc = qmckl_compute_factor_ee_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->electron.ee_distance_rescaled_deriv_e, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2437,21 +2431,20 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) ctx->jastrow.factor_en = factor_en; } - qmckl_exit_code rc = - qmckl_compute_factor_en(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->jastrow.factor_en); + rc = qmckl_compute_factor_en(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->jastrow.factor_en); if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2784,18 +2777,17 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context) ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_en_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->electron.en_distance_rescaled_deriv_e, - ctx->jastrow.factor_en_deriv_e); + rc = qmckl_compute_factor_en_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->electron.en_distance_rescaled_deriv_e, + ctx->jastrow.factor_en_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3203,14 +3195,13 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) ctx->jastrow.een_rescaled_e = een_rescaled_e; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e); + rc = qmckl_compute_een_rescaled_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3537,16 +3528,15 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_e_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.coord_new.data, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_e_deriv_e); + rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.coord_new.data, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_e_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3917,15 +3907,14 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context) ctx->jastrow.een_rescaled_n = een_rescaled_n; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_n(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n); + rc = qmckl_compute_een_rescaled_n(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4256,18 +4245,17 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_n_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.coord_new.data, - ctx->nucleus.coord.data, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e); + rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.coord_new.data, + ctx->nucleus.coord.data, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4758,14 +4746,13 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context) ctx->jastrow.cord_vect_full = cord_vect_full; } - qmckl_exit_code rc = - qmckl_compute_cord_vect_full(context, - ctx->nucleus.num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.cord_vector, - ctx->jastrow.cord_vect_full); + rc = qmckl_compute_cord_vect_full(context, + ctx->nucleus.num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.cord_vector, + ctx->jastrow.cord_vect_full); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4809,11 +4796,10 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) ctx->jastrow.lkpm_combined_index = lkpm_combined_index; } - qmckl_exit_code rc = - qmckl_compute_lkpm_combined_index(context, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.lkpm_combined_index); + rc = qmckl_compute_lkpm_combined_index(context, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.lkpm_combined_index); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4858,15 +4844,14 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c = tmp_c; } - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4899,7 +4884,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4911,15 +4896,14 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5943,18 +5927,17 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context) ctx->jastrow.factor_een = factor_een; } - qmckl_exit_code rc = - qmckl_compute_factor_een(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.factor_een); + rc = qmckl_compute_factor_een(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.factor_een); if (rc != QMCKL_SUCCESS) { return rc; } @@ -6456,20 +6439,19 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); + rc = qmckl_compute_factor_een_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index d920396..0928db6 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -849,13 +849,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, int64_t n0 = nidx-4; n0 = n0 < 0 ? 0 : n0; - for (int64_t n=n0 ; n < nidx ; n+=1) { - const double* restrict ck = coef_normalized_t + idx[n]*mo_num; - const double a1 = av1[n]; - const double a2 = av2[n]; - const double a3 = av3[n]; - const double a4 = av4[n]; - const double a5 = av5[n]; + for (int64_t m=n0 ; m < nidx ; m+=1) { + const double* restrict ck = coef_normalized_t + idx[m]*mo_num; + const double a1 = av1[m]; + const double a2 = av2[m]; + const double a3 = av3[m]; + const double a4 = av4[m]; + const double a5 = av5[m]; #ifdef HAVE_OPENMP #pragma omp simd diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 598ad32..ae358e8 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context, rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant); if (rc != 0) { // Send the entire block to slagel_splitting uint64_t l = 0; - rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, + (void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant); later = later + l; } From 586eb928013f34376fc9ac8a4d54b2384fec2977 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Tue, 5 Apr 2022 14:23:20 +0200 Subject: [PATCH 13/27] compute_cord_vect_full done --- org/qmckl_jastrow.org | 123 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 22 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 5d600f6..14e1f1e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5055,7 +5055,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | out | Full list of coefficients | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_cord_vect_full_f( & +integer function qmckl_compute_cord_vect_full_doc_f( & context, nucl_num, dim_cord_vect, type_nucl_num, & type_nucl_vector, cord_vector, cord_vect_full) & result(info) @@ -5098,29 +5098,14 @@ integer function qmckl_compute_cord_vect_full_f( & cord_vect_full(a,1:dim_cord_vect) = cord_vector(type_nucl_vector(a),1:dim_cord_vect) end do -end function qmckl_compute_cord_vect_full_f +end function qmckl_compute_cord_vect_full_doc_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); - #+end_src - - - #+CALL: generate_c_interface(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname="qmckl_compute_cord_vect_full_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_cord_vect_full & + integer(c_int32_t) function qmckl_compute_cord_vect_full_doc & (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & bind(C) result(info) @@ -5135,13 +5120,106 @@ end function qmckl_compute_cord_vect_full_f real (c_double ) , intent(in) :: cord_vector(type_nucl_num,dim_cord_vect) real (c_double ) , intent(out) :: cord_vect_full(nucl_num,dim_cord_vect) - integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f - info = qmckl_compute_cord_vect_full_f & + integer(c_int32_t), external :: qmckl_compute_cord_vect_full_doc_f + info = qmckl_compute_cord_vect_full_doc_f & (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) - end function qmckl_compute_cord_vect_full + end function qmckl_compute_cord_vect_full_doc #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_cord_vect_full_hpc ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ) { + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (type_nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (dim_cord_vect <= 0) { + return QMCKL_INVALID_ARG_5; + } + + for (int i=0; i < dim_cord_vect; ++i) { + for (int a=0; a < nucl_num; ++a){ + cord_vect_full[a + i*nucl_num] = cord_vector[(type_nucl_vector[a]-1)+i*type_nucl_num]; + } + } + + return QMCKL_SUCCESS; + } + #+end_src + + + #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname="qmckl_compute_cord_vect_full_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_cord_vect_full ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_cord_vect_full_doc ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_cord_vect_full_hpc ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_cord_vect_full ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ) { + + #ifdef HAVE_HPC + return qmckl_compute_cord_vect_full_hpc(context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full); + #else + return qmckl_compute_cord_vect_full_doc(context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full); + #endif + } + #+end_src + + + *** Compute lkpm_combined_index :PROPERTIES: :Name: qmckl_compute_lkpm_combined_index @@ -6339,6 +6417,7 @@ double factor_een[walk_num]; rc = qmckl_get_jastrow_factor_een(context, &(factor_een[0]),walk_num); assert(fabs(factor_een[0] + 0.37407972141304213) < 1e-12); +return QMCKL_SUCCESS; #+end_src ** Electron-electron-nucleus Jastrow \(f_{een}\) derivative From eb71a752f5bfc8a3afa04d0f0995078163bd35c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 14:28:35 +0200 Subject: [PATCH 14/27] Fixed naive GPU kernels and ignored variable issue --- org/ao_grid.f90 | 114 -------------------------- org/qmckl_jastrow.org | 186 +++++++++++++++++++++++++++--------------- 2 files changed, 120 insertions(+), 180 deletions(-) delete mode 100644 org/ao_grid.f90 diff --git a/org/ao_grid.f90 b/org/ao_grid.f90 deleted file mode 100644 index 685313f..0000000 --- a/org/ao_grid.f90 +++ /dev/null @@ -1,114 +0,0 @@ -subroutine qmckl_check_error(rc, message) - use qmckl - implicit none - integer(qmckl_exit_code), intent(in) :: rc - character(len=*) , intent(in) :: message - character(len=128) :: str_buffer - if (rc /= QMCKL_SUCCESS) then - print *, message - call qmckl_string_of_error(rc, str_buffer) - print *, str_buffer - call exit(rc) - end if -end subroutine qmckl_check_error - -program ao_grid - use qmckl - implicit none - - integer(qmckl_context) :: qmckl_ctx ! QMCkl context - integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions - - character(len=128) :: trexio_filename - character(len=128) :: str_buffer - integer :: ao_id - integer :: point_num_x - - integer(c_int64_t) :: nucl_num - double precision, allocatable :: nucl_coord(:,:) - - integer(c_int64_t) :: point_num - integer(c_int64_t) :: ao_num - integer(c_int64_t) :: ipoint, i, j, k - double precision :: x, y, z, dr(3) - double precision :: rmin(3), rmax(3) - double precision, allocatable :: points(:,:) - double precision, allocatable :: ao_vgl(:,:,:) - -if (iargc() /= 3) then - print *, 'Syntax: ao_grid ' - call exit(-1) -end if -call getarg(1, trexio_filename) -call getarg(2, str_buffer) -read(str_buffer, *) ao_id -call getarg(3, str_buffer) -read(str_buffer, *) point_num_x - -if (point_num_x < 0 .or. point_num_x > 300) then - print *, 'Error: 0 < point_num < 300' - call exit(-1) -end if - -qmckl_ctx = qmckl_context_create() -rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) -call qmckl_check_error(rc, 'Read TREXIO') - -rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) -call qmckl_check_error(rc, 'Getting ao_num') - -if (ao_id < 0 .or. ao_id > ao_num) then - print *, 'Error: 0 < ao_id < ', ao_num - call exit(-1) -end if - -rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) -call qmckl_check_error(rc, 'Get nucleus num') - -allocate( nucl_coord(3, nucl_num) ) -rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) -call qmckl_check_error(rc, 'Get nucleus coord') - -rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 -rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 -rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 - -rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0 -rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0 -rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 - -dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) - -point_num = point_num_x**3 -allocate( points(point_num, 3) ) -ipoint=0 -z = rmin(3) -do k=1,point_num_x - y = rmin(2) - do j=1,point_num_x - x = rmin(1) - do i=1,point_num_x - ipoint = ipoint+1 - points(ipoint,1) = x - points(ipoint,2) = y - points(ipoint,3) = z - x = x + dr(1) - end do - y = y + dr(2) - end do - z = z + dr(3) -end do - -rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) -call qmckl_check_error(rc, 'Setting points') - -allocate( ao_vgl(ao_num, 5, point_num) ) -rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) -call qmckl_check_error(rc, 'Setting points') - -do ipoint=1, point_num - print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) -end do - -deallocate( nucl_coord, points, ao_vgl ) -end program ao_grid diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 46bb5da..e69088c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4889,57 +4889,65 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } + /* Choose the correct compute function (depending on offload type) */ - bool default_compute = true; + switch(ctx->jastrow.offload_type) { + case OFFLOAD_OPENACC: + #ifdef HAVE_OPENACC_OFFLOAD + rc = + qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #elif + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); - #ifdef HAVE_OPENACC_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { - qmckl_exit_code rc = - qmckl_compute_tmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } + #endif + break; + case OFFLOAD_CUBLAS: + #ifdef HAVE_CUBLAS_OFFLOAD + rc = + qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #elif + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #endif + break; + default: + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + break; } - #endif - - #ifdef HAVE_CUBLAS_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { - qmckl_exit_code rc = - qmckl_compute_tmp_c_cublas_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } - } - #endif - - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; - } - ctx->jastrow.tmp_c_date = ctx->date; } @@ -4980,15 +4988,61 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } + switch(ctx->jastrow.offload_type) { + case OFFLOAD_OPENACC: + #ifdef HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #elif + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #endif + break; + case OFFLOAD_CUBLAS: + #ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #elif + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #endif + break; + default: + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + break; + } - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5617,7 +5671,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( const double* een_rescaled_n, double* const tmp_c ) { - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5649,8 +5702,8 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( int stride_j_n = stride_k_n * nucl_num; int stride_nw_n = stride_j_n * (cord_num+1); - //#pragma acc parallel - //#pragma acc loop independent gang worker vector collapse(5) + #pragma acc parallel + #pragma acc loop independent gang worker vector collapse(5) for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Tue, 5 Apr 2022 14:37:57 +0200 Subject: [PATCH 15/27] Fix preprocessor else and remove old cuBLAS interface --- org/qmckl_blas.org | 92 ------------------------------------------- org/qmckl_jastrow.org | 8 ++-- 2 files changed, 4 insertions(+), 96 deletions(-) diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 4f83705..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -2288,98 +2288,6 @@ qmckl_transpose (qmckl_context context, #+end_src -* cuBLAS interface (optional) -We propose a cuBLAS version of some QMCkl kernels. However, because cuBLAS is written in C, we need to define a Fortran interface for it. We start by defining functions to manage the cuBLAS handle structure from Fortran, before writing interfaces for the specific cuBLAS functions we are interested in. - -TODO These are the C functions that are supposed to be called from Fortran. We still need to write the interfaces themselves. - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -#include -#endif -#+end_src - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -cublasHandle_t* get_cublas_handle_interfaced(); -#endif -#+end_src - -#+begin_src c :comments org :tangle (eval c) :exports none -#ifdef HAVE_CUBLAS_OFFLOAD -cublasHandle_t* get_cublas_handle_interfaced() { - cublasHandle_t* handle = malloc(sizeof(cublasHandle_t)); - - cublasStatus_t status = cublasCreate(handle); - if (status != CUBLAS_STATUS_SUCCESS){ - fprintf(stderr, "Error while initializing cuBLAS\n"); - exit(1); - } - - return handle; -} -#endif -#+end_src - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -void destroy_cublas_handle_interfaced(cublasHandle_t* handle); -#endif -#+end_src - -#+begin_src c :comments org :tangle (eval c) :exports none -#ifdef HAVE_CUBLAS_OFFLOAD -void destroy_cublas_handle_interfaced(cublasHandle_t* handle) { - if(handle != NULL) { - free(handle); - } -} -#endif -#+end_src - -** DGEMM - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -cublasStatus_t cublasDgemm_f( - cublasHandle_t* handle, - cublasOperation_t* transa, cublasOperation_t* transb, - int* m, int* n, int* k, - const double* alpha, - const double*A, int* lda, - const double* B, int* ldb, - const double* beta, - double*C, int* ldc -); -#endif -#+end_src - -#+begin_src c :comments org :tangle (eval c) :exports none - -#ifdef HAVE_CUBLAS_OFFLOAD -cublasStatus_t cublasDgemm_f( - cublasHandle_t* handle, - cublasOperation_t* transa, cublasOperation_t* transb, - int* m, int* n, int* k, - const double* alpha, - const double*A, int* lda, - const double* B, int* ldb, - const double* beta, - double*C, int* ldc -) { - return cublasDgemm_f( - handle, - transa, transb, - m, n, k, - alpha, A, lda, B,ldb, - beta, C, ldc - ); -} -#endif -#+end_src - - - * End of files :noexport: diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index e69088c..970feb7 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4903,7 +4903,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); - #elif + #else rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -4926,7 +4926,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); - #elif + #else rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -4999,7 +4999,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #elif + #else rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -5020,7 +5020,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #elif + #else rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, From 63c7f8ea72d4c9c14e4c97f28edf64d883d9a9d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 16:29:52 +0200 Subject: [PATCH 16/27] Replace placeholder cuBLAS kernels with new C HPC implementation --- org/qmckl_jastrow.org | 110 +++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 970feb7..c602d84 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5783,17 +5783,6 @@ qmckl_exit_code qmckl_compute_tmp_c_cublas_offload ( const double* een_rescaled_n, double* const tmp_c ) { - qmckl_exit_code info; - int i, j, a, l, kk, p, lmax, nw; - char TransA, TransB; - double alpha, beta; - int M, N, K, LDA, LDB, LDC; - - TransA = 'N'; - TransB = 'N'; - alpha = 1.0; - beta = 0.0; - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5810,29 +5799,40 @@ qmckl_exit_code qmckl_compute_tmp_c_cublas_offload ( return QMCKL_INVALID_ARG_4; } - M = elec_num; - N = nucl_num*(cord_num + 1); - K = elec_num; + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_5; + } - LDA = sizeof(een_rescaled_e)/sizeof(double); - LDB = sizeof(een_rescaled_n)/sizeof(double); - LDC = sizeof(tmp_c)/sizeof(double); + qmckl_exit_code info = QMCKL_SUCCESS; - // TODO Replace with cuBLAS calls - for (int nw=0; nw < walk_num; ++nw) { - for (int i=0; i Date: Tue, 5 Apr 2022 16:52:35 +0200 Subject: [PATCH 17/27] Fix info --- org/qmckl_mo.org | 1 + 1 file changed, 1 insertion(+) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 0928db6..5010283 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -655,6 +655,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & end if end do end do + info = QMCKL_SUCCESS end function qmckl_compute_mo_basis_mo_vgl_doc_f #+end_src From 0489831e18118b9d9888058729fa59302465bc86 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 17:06:29 +0200 Subject: [PATCH 18/27] Simplified configure --- configure.ac | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index ffa8f99..474442e 100644 --- a/configure.ac +++ b/configure.ac @@ -137,10 +137,10 @@ case "$with_chameleon" in [PKG_CFLAGS="$PKG_CFLAGS $LIBCHAMELEON_CFLAGS" PKG_LIBS="$PKG_LIBS $LIBCHAMELEON_LIBS"] ,[ - + ## something went wrong. ## try to find the package without pkg-config - + ## check that the library is actually new enough. ## by testing for a 1.0.0+ function which we use AC_CHECK_LIB(chameleon,CHAMELEON_finalize,[LIBCHAMELEON_LIBS="-lchameleon"]) @@ -205,18 +205,11 @@ case $FC in ;; *nvfortran*) - FCFLAGS="$FCFLAGS -fPIC -Mnomain -mp -target=gpu" + FCFLAGS="$FCFLAGS -fPIC -Mnomain" ;; esac -case $CC in - - *nvc*) - CFLAGS="$CFLAGS -fPIC -mp -target=gpu" - ;; -esac - # Options. AC_ARG_ENABLE(hpc, [AS_HELP_STRING([--enable-hpc],[Use HPC-optimized functions])], HAVE_HPC=$enableval, HAVE_HPC=no) @@ -243,25 +236,38 @@ fi ## Enable GPU offloading # OpenACC offloading -AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) +AC_ARG_ENABLE(enable-openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ - AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) - CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" - FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" + AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) + case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fPIC -fopenacc" + ;; + *nvc*) + CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + ;; + esac + + CFLAGS="$CFLAGS" +# FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) # cuBLAS offloading -AC_ARG_ENABLE(cublas-offload, [AS_HELP_STRING([--cublas-offload],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AC_ARG_ENABLE(enable-cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ - AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) - FCFLAGS="-DHAVE_CUBLAS_OFFLOAD" + AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) + case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fPIC -fopenacc" + ;; + *nvc*) + CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + ;; + esac ]) -# General offload -AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes" || test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ - CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" - FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS $FCFLAGS" -]) ## From 08f01ece894837da3328f7c352beca1399ebf8ad Mon Sep 17 00:00:00 2001 From: 2323 Date: Tue, 5 Apr 2022 17:57:56 +0200 Subject: [PATCH 19/27] Fix configure --- configure.ac | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 474442e..8013725 100644 --- a/configure.ac +++ b/configure.ac @@ -210,6 +210,16 @@ case $FC in esac +case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fPIC" + ;; + *nvc*) + CFLAGS="$CFLAGS -fPIC" + ;; +esac + # Options. AC_ARG_ENABLE(hpc, [AS_HELP_STRING([--enable-hpc],[Use HPC-optimized functions])], HAVE_HPC=$enableval, HAVE_HPC=no) @@ -236,34 +246,32 @@ fi ## Enable GPU offloading # OpenACC offloading -AC_ARG_ENABLE(enable-openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) +AC_ARG_ENABLE(openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) case $CC in *gcc*) - CFLAGS="$CFLAGS -fPIC -fopenacc" + CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + CFLAGS="$CFLAGS -mp -target=gpu" ;; esac - CFLAGS="$CFLAGS" -# FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) # cuBLAS offloading -AC_ARG_ENABLE(enable-cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) case $CC in *gcc*) - CFLAGS="$CFLAGS -fPIC -fopenacc" + CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + CFLAGS="$CFLAGS -mp -target=gpu" ;; esac ]) From f02e761b7939888220328220c4ad398c777f7c24 Mon Sep 17 00:00:00 2001 From: 2323 Date: Tue, 5 Apr 2022 19:31:11 +0200 Subject: [PATCH 20/27] Fixed configure.ac for GPUs --- configure.ac | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 8013725..38df570 100644 --- a/configure.ac +++ b/configure.ac @@ -255,7 +255,17 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -mp -target=gpu" + CFLAGS="$CFLAGS -acc -mp -target=gpu" + ;; + esac + + case $FC in + + *gfortran*) + FCFLAGS="$FCFLAGS -fopenacc" + ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -acc -mp -target=gpu" ;; esac @@ -271,7 +281,17 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -mp -target=gpu" + CFLAGS="$CFLAGS -acc -mp -target=gpu" + ;; + esac + + case $FC in + + *gfortran*) + FCFLAGS="$FCFLAGS -fopenacc" + ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -acc -mp -target=gpu" ;; esac ]) From 72fad819bf35cdf96a96b9d6ffc0880c570d014f Mon Sep 17 00:00:00 2001 From: 2323 Date: Wed, 6 Apr 2022 10:03:56 +0200 Subject: [PATCH 21/27] Fix flags --- configure.ac | 9 +++++---- org/qmckl_jastrow.org | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 38df570..835f3f9 100644 --- a/configure.ac +++ b/configure.ac @@ -93,6 +93,7 @@ AC_PROG_F77 m4_version_prereq([2.70],[], [AC_PROG_CC_C99]) AS_IF([test "$ac_cv_prog_cc_c99" = "no"], [AC_MSG_ERROR([The compiler does not support C99])]) AC_PROG_CC_C_O +AM_PROG_CC_C_O AC_PROG_FC AC_PROG_FC_C_O AC_FC_PP_DEFINE @@ -255,7 +256,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc -mp -target=gpu" + CFLAGS="$CFLAGS -acc" ;; esac @@ -265,7 +266,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc -mp -target=gpu" + FCFLAGS="$FCFLAGS -acc" ;; esac @@ -281,7 +282,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc -mp -target=gpu" + CFLAGS="$CFLAGS -acc" ;; esac @@ -291,7 +292,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc -mp -target=gpu" + FCFLAGS="$FCFLAGS -acc" ;; esac ]) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index c602d84..017e372 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -737,7 +737,7 @@ qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmc "offload_type is a null pointer"); } - qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; assert (ctx != NULL); int32_t mask = 1 << 0; From 0966e1e2b1f38082419b00d760131579de40bfc4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 6 Apr 2022 10:42:00 +0200 Subject: [PATCH 22/27] Fix OpenACC --- org/qmckl_jastrow.org | 1060 +++++++++++++++++++++-------------------- 1 file changed, 540 insertions(+), 520 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 017e372..7b5bcee 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1509,11 +1509,11 @@ end function qmckl_compute_asymp_jasb_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ) { + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ) { if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; @@ -1545,11 +1545,11 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ); #+end_src @@ -1827,15 +1827,15 @@ end function qmckl_compute_factor_ee_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ) { + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ) { int ipar; // can we use a smaller integer? double x, x1, spin_fact, power_ser; @@ -1896,15 +1896,15 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ); #+end_src @@ -2207,16 +2207,16 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, - double* const factor_ee_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); #+end_src @@ -2225,17 +2225,17 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - ee_distance_rescaled_deriv_e, & - asymp_jasb, & - factor_ee_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + up_num, & + bord_num, & + bord_vector, & + ee_distance_rescaled, & + ee_distance_rescaled_deriv_e, & + asymp_jasb, & + factor_ee_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -2253,16 +2253,16 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f info = qmckl_compute_factor_ee_deriv_e_f & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - ee_distance_rescaled_deriv_e, & - asymp_jasb, & - factor_ee_deriv_e) + (context, & + walk_num, & + elec_num, & + up_num, & + bord_num, & + bord_vector, & + ee_distance_rescaled, & + ee_distance_rescaled_deriv_e, & + asymp_jasb, & + factor_ee_deriv_e) end function qmckl_compute_factor_ee_deriv_e #+end_src @@ -2593,16 +2593,16 @@ end function qmckl_compute_factor_en_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ) { + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ) { double x, x1, power_ser; @@ -2681,16 +2681,16 @@ qmckl_exit_code qmckl_compute_factor_en ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ); #+end_src @@ -2975,17 +2975,17 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_en_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + const double* en_distance_rescaled_deriv_e, + double* const factor_en_deriv_e ); #+end_src @@ -2994,18 +2994,18 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_en_deriv_e & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - en_distance_rescaled_deriv_e, & - factor_en_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + nucl_num, & + type_nucl_num, & + type_nucl_vector, & + aord_num, & + aord_vector, & + en_distance_rescaled, & + en_distance_rescaled_deriv_e, & + factor_en_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3024,17 +3024,17 @@ end function qmckl_compute_factor_en_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_en_deriv_e_f info = qmckl_compute_factor_en_deriv_e_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - en_distance_rescaled_deriv_e, & - factor_en_deriv_e) + (context, & + walk_num, & + elec_num, & + nucl_num, & + type_nucl_num, & + type_nucl_vector, & + aord_num, & + aord_vector, & + en_distance_rescaled, & + en_distance_rescaled_deriv_e, & + factor_en_deriv_e) end function qmckl_compute_factor_en_deriv_e #+end_src @@ -3371,13 +3371,13 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3385,9 +3385,9 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_een_rescaled_e & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3402,7 +3402,7 @@ end function qmckl_compute_een_rescaled_e_f integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f info = qmckl_compute_een_rescaled_e_f & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) end function qmckl_compute_een_rescaled_e #+end_src @@ -3709,15 +3709,15 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* coord_new, - const double* ee_distance, - const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* coord_new, + const double* ee_distance, + const double* een_rescaled_e, + double* const een_rescaled_e_deriv_e ); #+end_src @@ -3726,16 +3726,16 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_e_deriv_e & - (context, & - walk_num, & - elec_num, & - cord_num, & - rescale_factor_kappa_ee, & - coord_new, & - ee_distance, & - een_rescaled_e, & - een_rescaled_e_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + cord_num, & + rescale_factor_kappa_ee, & + coord_new, & + ee_distance, & + een_rescaled_e, & + een_rescaled_e_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3752,15 +3752,15 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_e_deriv_e_f info = qmckl_compute_factor_een_rescaled_e_deriv_e_f & - (context, & - walk_num, & - elec_num, & - cord_num, & - rescale_factor_kappa_ee, & - coord_new, & - ee_distance, & - een_rescaled_e, & - een_rescaled_e_deriv_e) + (context, & + walk_num, & + elec_num, & + cord_num, & + rescale_factor_kappa_ee, & + coord_new, & + ee_distance, & + een_rescaled_e, & + een_rescaled_e_deriv_e) end function qmckl_compute_factor_een_rescaled_e_deriv_e #+end_src @@ -4064,14 +4064,14 @@ end function qmckl_compute_een_rescaled_n_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ) { + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ) { if (context == QMCKL_NULL_CONTEXT) { @@ -4131,14 +4131,14 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ); #+end_src *** Test @@ -4438,17 +4438,17 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_een_rescaled_n_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* coord_new, - const double* coord, - const double* en_distance, - const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* coord_new, + const double* coord, + const double* en_distance, + const double* een_rescaled_n, + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -4456,18 +4456,18 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_n_deriv_e & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - coord_new, & - coord, & - en_distance, & - een_rescaled_n, & - een_rescaled_n_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + rescale_factor_kappa_en, & + coord_new, & + coord, & + en_distance, & + een_rescaled_n, & + een_rescaled_n_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -4486,17 +4486,17 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_n_deriv_e_f info = qmckl_compute_factor_een_rescaled_n_deriv_e_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - coord_new, & - coord, & - en_distance, & - een_rescaled_n, & - een_rescaled_n_deriv_e) + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + rescale_factor_kappa_en, & + coord_new, & + coord, & + en_distance, & + een_rescaled_n, & + een_rescaled_n_deriv_e) end function qmckl_compute_factor_een_rescaled_n_deriv_e #+end_src @@ -4914,7 +4914,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c); #endif - break; + break; case OFFLOAD_CUBLAS: #ifdef HAVE_CUBLAS_OFFLOAD rc = @@ -4936,7 +4936,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); #endif - break; + break; default: rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, @@ -4946,7 +4946,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); - break; + break; } ctx->jastrow.tmp_c_date = ctx->date; @@ -5009,7 +5009,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); #endif - break; + break; case OFFLOAD_CUBLAS: #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_dtmp_c_acc_offload(context, @@ -5030,7 +5030,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); #endif - break; + break; default: rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, @@ -5040,7 +5040,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - break; + break; } if (rc != QMCKL_SUCCESS) { @@ -5113,9 +5113,9 @@ end function qmckl_compute_dim_cord_vect_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect){ + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect){ int lmax; @@ -5153,9 +5153,9 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect ); + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect ); #+end_src @@ -5229,13 +5229,13 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); #+end_src @@ -5244,8 +5244,8 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_cord_vect_full & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & - bind(C) result(info) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -5260,7 +5260,7 @@ end function qmckl_compute_cord_vect_full_f integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f info = qmckl_compute_cord_vect_full_f & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) end function qmckl_compute_cord_vect_full #+end_src @@ -5336,10 +5336,10 @@ end function qmckl_compute_lkpm_combined_index_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ) { + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ) { int kk, lmax, m; @@ -5386,10 +5386,10 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ); #+end_src @@ -5511,14 +5511,14 @@ end function qmckl_compute_tmp_c_doc #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_tmp_c_hpc ( - const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e, - const double* een_rescaled_n, - double* const tmp_c ) { + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -5662,14 +5662,14 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, #+begin_src c :comments org :tangle (eval c) :noweb yes #ifdef HAVE_OPENACC_OFFLOAD qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( - const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e, - const double* een_rescaled_n, - double* const tmp_c ) { + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -5689,31 +5689,40 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( // Compute array access strides: // For tmp_c... - int stride_k_c = elec_num; - int stride_j_c = stride_k_c * nucl_num; - int stride_i_c = stride_j_c * (cord_num+1); - int stride_nw_c = stride_i_c * cord_num; + const int64_t stride_k_c = elec_num; + const int64_t stride_j_c = stride_k_c * nucl_num; + const int64_t stride_i_c = stride_j_c * (cord_num+1); + const int64_t stride_nw_c = stride_i_c * cord_num; // For een_rescaled_e... - int stride_m_e = elec_num; - int stride_i_e = stride_m_e * elec_num; - int stride_nw_e = stride_i_e * (cord_num+1); + const int64_t stride_m_e = elec_num; + const int64_t stride_i_e = stride_m_e * elec_num; + const int64_t stride_nw_e = stride_i_e * (cord_num+1); // For een_rescaled_n... - int stride_k_n = elec_num; - int stride_j_n = stride_k_n * nucl_num; - int stride_nw_n = stride_j_n * (cord_num+1); + const int64_t stride_k_n = elec_num; + const int64_t stride_j_n = stride_k_n * nucl_num; + const int64_t stride_nw_n = stride_j_n * (cord_num+1); - #pragma acc parallel - #pragma acc loop independent gang worker vector collapse(5) - for (int nw=0; nw < walk_num; ++nw) { - for (int i=0; i Date: Wed, 6 Apr 2022 11:51:36 +0200 Subject: [PATCH 23/27] Fix openacc --- configure.ac | 8 ++++---- org/qmckl_jastrow.org | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 835f3f9..de4949c 100644 --- a/configure.ac +++ b/configure.ac @@ -256,7 +256,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc" + CFLAGS="$CFLAGS -acc=gpu" ;; esac @@ -266,7 +266,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc" + FCFLAGS="$FCFLAGS -acc=gpu" ;; esac @@ -282,7 +282,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc" + CFLAGS="$CFLAGS -acc=gpu" ;; esac @@ -292,7 +292,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc" + FCFLAGS="$FCFLAGS -acc=gpu" ;; esac ]) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 7b5bcee..35003f5 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5706,7 +5706,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; - #pragma acc parallel create(tmp_c[0:size_tmp_c]) copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) { #pragma acc loop independent gang worker vector for (int64_t i=0 ; i Date: Wed, 6 Apr 2022 13:48:37 +0200 Subject: [PATCH 24/27] Improve configure --- configure.ac | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index de4949c..d3c9471 100644 --- a/configure.ac +++ b/configure.ac @@ -246,10 +246,41 @@ fi ## Enable GPU offloading -# OpenACC offloading -AC_ARG_ENABLE(openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) -AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ +# GPU offloading +AC_ARG_ENABLE(gpu, [AS_HELP_STRING([--enable-gpu],[openmp|openacc : Use GPU-offloaded functions])], enable_gpu=$enableval, enable_gpu=no) +AS_IF([test "$enable_gpu" = "yes"], [enable_gpu="openmp"]) + +# OpenMP offloading +HAVE_OPENMP_OFFLOAD="no" +AS_IF([test "$enable_gpu" = "openmp"], [ + AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) + HAVE_OPENMP_OFFLOAD="yes" + case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fopenmp" + ;; + *nvc*) + CFLAGS="$CFLAGS -mp=gpu" + ;; + esac + + case $FC in + + *gfortran*) + FCFLAGS="$FCFLAGS -fopenmp" + ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -mp=gpu" + ;; + esac] +) + +# OpenMP offloading +HAVE_OPENACC_OFFLOAD="no" +AS_IF([test "$enable_gpu" = "openacc"], [ AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) + HAVE_OPENACC_OFFLOAD="yes" case $CC in *gcc*) @@ -430,6 +461,7 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} +OpenMP offload..: ${HAVE_OPENMP_OFFLOAD} OpenACC offload.: ${HAVE_OPENACC_OFFLOAD} cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD} From b79a23897d333fd5bbf1b578e1a9a87e41e08b61 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 6 Apr 2022 14:01:13 +0200 Subject: [PATCH 25/27] qmckl_compute_een_rescaled_e_hpc (c version) working --- org/qmckl_jastrow.org | 209 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 186 insertions(+), 23 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 14e1f1e..e2eb0cd 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -3241,7 +3241,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | out | Electron-electron rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_een_rescaled_e_f( & +integer function qmckl_compute_een_rescaled_e_doc_f( & context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & result(info) @@ -3260,7 +3260,6 @@ integer function qmckl_compute_een_rescaled_e_f( & allocate(een_rescaled_e_ij(elec_num * (elec_num - 1) / 2, cord_num + 1)) - info = QMCKL_SUCCESS if (context == QMCKL_NULL_CONTEXT) then @@ -3289,6 +3288,7 @@ integer function qmckl_compute_een_rescaled_e_f( & een_rescaled_e_ij = 0.0d0 een_rescaled_e_ij(:, 1) = 1.0d0 + k = 0 do j = 1, elec_num do i = 1, j - 1 @@ -3297,6 +3297,7 @@ integer function qmckl_compute_een_rescaled_e_f( & end do end do + do l = 2, cord_num do k = 1, elec_num * (elec_num - 1)/2 een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2) @@ -3305,6 +3306,7 @@ integer function qmckl_compute_een_rescaled_e_f( & ! prepare the actual een table een_rescaled_e(:, :, 0, nw) = 1.0d0 + do l = 1, cord_num k = 0 do j = 1, elec_num @@ -3325,28 +3327,14 @@ integer function qmckl_compute_een_rescaled_e_f( & end do -end function qmckl_compute_een_rescaled_e_f +end function qmckl_compute_een_rescaled_e_doc_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); - #+end_src - - #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_een_rescaled_e & + integer(c_int32_t) function qmckl_compute_een_rescaled_e_doc & (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & bind(C) result(info) @@ -3362,13 +3350,188 @@ end function qmckl_compute_een_rescaled_e_f real (c_double ) , intent(in) :: ee_distance(elec_num,elec_num,walk_num) real (c_double ) , intent(out) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) - integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f - info = qmckl_compute_een_rescaled_e_f & + integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_doc_f + info = qmckl_compute_een_rescaled_e_doc_f & (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) - end function qmckl_compute_een_rescaled_e + end function qmckl_compute_een_rescaled_e_doc #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ) { + + double *een_rescaled_e_ij; + double x; + const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2; + const int64_t len_een_ij = elec_pairs * (cord_num + 1); + int64_t k; + + // number of element for the een_rescaled_e_ij[N_e*(N_e-1)/2][cord+1] + // probably in C is better [cord+1, Ne*(Ne-1)/2] + //elec_pairs = (elec_num * (elec_num - 1)) / 2; + //len_een_ij = elec_pairs * (cord_num + 1); + een_rescaled_e_ij = (double *) malloc (len_een_ij * sizeof(double)); + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + // Prepare table of exponentiated distances raised to appropriate power + // init + + for (int kk = 0; kk < walk_num*(cord_num+1)*elec_num*elec_num; ++kk) { + een_rescaled_e[kk]= 0.0; + } + + /* + for (int nw = 0; nw < walk_num; ++nw) { + for (int l = 0; l < (cord_num + 1); ++l) { + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < elec_num; ++j) { + een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*(cord_num+1)*elec_num*elec_num]= 0.0; + } + } + } + } + */ + + for (int nw = 0; nw < walk_num; ++nw) { + + for (int kk = 0; kk < len_een_ij; ++kk) { + // this array initialized at 0 except een_rescaled_e_ij(:, 1) = 1.0d0 + // and the arrangement of indices is [cord_num+1, ne*(ne-1)/2] + een_rescaled_e_ij[kk]= ( kk < (elec_pairs) ? 1.0 : 0.0 ); + } + + k = 0; + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < i; ++j) { + // een_rescaled_e_ij(k, 2) = dexp(-rescale_factor_kappa_ee * ee_distance(i, j, nw)); + een_rescaled_e_ij[k + elec_pairs] = exp(-rescale_factor_kappa_ee * \ + ee_distance[j + i*elec_num + nw*(elec_num*elec_num)]); + k = k + 1; + } + } + + + for (int l = 2; l < (cord_num+1); ++l) { + for (int k = 0; k < elec_pairs; ++k) { + // een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2) + een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] * \ + een_rescaled_e_ij[k + elec_pairs]; + } + } + + + // prepare the actual een table + for (int i = 0; i < elec_num; ++i){ + for (int j = 0; j < elec_num; ++j) { + een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0; + } + } + + // Up to here it should work. + for ( int l = 1; l < (cord_num+1); ++l) { + k = 0; + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < i; ++j) { + x = een_rescaled_e_ij[k + l*elec_pairs]; + een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = x; + een_rescaled_e[i + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = x; + k = k + 1; + } + } + } + + for (int l = 0; l < (cord_num + 1); ++l) { + for (int j = 0; j < elec_num; ++j) { + een_rescaled_e[j + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = 0.0; + } + } + + } + + free(een_rescaled_e_ij); + + return QMCKL_SUCCESS; +} + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_een_rescaled_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_een_rescaled_e_doc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes + qmckl_exit_code qmckl_compute_een_rescaled_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ) { + + #ifdef HAVE_HPC + return qmckl_compute_een_rescaled_e_hpc(context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e); + #else + return qmckl_compute_een_rescaled_e_doc(context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e); + #endif + } + #+end_src + + + *** Test #+begin_src python :results output :exports none :noweb yes @@ -3443,7 +3606,6 @@ assert(fabs(een_rescaled_e[0][1][0][4]-0.01754273169464735) < 1.e-12); assert(fabs(een_rescaled_e[0][2][1][3]-0.02214680362033448) < 1.e-12); assert(fabs(een_rescaled_e[0][2][1][4]-0.0005700154999202759) < 1.e-12); assert(fabs(een_rescaled_e[0][2][1][5]-0.3424402276009091) < 1.e-12); - #+end_src ** Electron-electron rescaled distances for each order and derivatives @@ -5916,6 +6078,7 @@ rc = qmckl_get_jastrow_dtmp_c(context, &(dtmp_c[0][0][0][0][0][0])); assert(fabs(tmp_c[0][0][1][0][0] - 2.7083473948352403) < 1e-12); assert(fabs(dtmp_c[0][1][0][0][0][0] - 0.237440520852232) < 1e-12); +return QMCKL_SUCCESS; #+end_src ** Electron-electron-nucleus Jastrow \(f_{een}\) From cc5f6914f6e92688b59ee187560a5d6a3a5ace48 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 6 Apr 2022 16:26:35 +0200 Subject: [PATCH 26/27] Cleaning --- org/qmckl_jastrow.org | 1248 +++++++++++++++++++++-------------------- org/qmckl_mo.org | 2 - 2 files changed, 646 insertions(+), 604 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 35003f5..666da47 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -151,7 +151,6 @@ int main() { | ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative | | ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part | | ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative | - | ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime | computed data: @@ -328,14 +327,6 @@ kappa_inv = 1.0/kappa ** Data structure -#+begin_src c :comments org :tangle (eval h_type) -typedef enum qmckl_jastrow_offload_type{ - OFFLOAD_NONE, - OFFLOAD_OPENACC, - OFFLOAD_CUBLAS -} qmckl_jastrow_offload_type; -#+end_src - #+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; @@ -381,7 +372,10 @@ typedef struct qmckl_jastrow_struct{ uint64_t een_rescaled_n_deriv_e_date; bool provided; char * type; - qmckl_jastrow_offload_type offload_type; + + #ifdef HAVE_HPC + bool gpu_offload; + #endif } qmckl_jastrow_struct; #+end_src @@ -426,7 +420,6 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max); -qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type); #+end_src Along with these core functions, calculation of the jastrow factor @@ -724,32 +717,6 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context, return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return (char) 0; - } - - if (offload_type == NULL) { - return qmckl_failwith( context, - QMCKL_INVALID_ARG_2, - "qmckl_get_jastrow_offload_type", - "offload_type is a null pointer"); - } - - qmckl_context_struct* const ctx = (qmckl_context_struct*) context; - assert (ctx != NULL); - - int32_t mask = 1 << 0; - - if ( (ctx->jastrow.uninitialized & mask) != 0) { - return QMCKL_NOT_PROVIDED; - } - - *offload_type = ctx->jastrow.offload_type; - return QMCKL_SUCCESS; -} - #+end_src ** Initialization functions @@ -764,7 +731,6 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max); -qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type); #+end_src #+NAME:pre2 @@ -1101,14 +1067,6 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, <> } -qmckl_exit_code -qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type) -{ -<> - ctx->jastrow.offload_type = offload_type; - return QMCKL_SUCCESS; -} - #+end_src When the required information is completely entered, other data structures are @@ -1155,6 +1113,13 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { NULL); } + /* Decide if the Jastrow if offloaded on GPU or not */ +#if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) + ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; +#else + ctx->jastrow.gpu_offload = false; +#endif + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1540,16 +1505,16 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( } #+end_src - #+CALL: generate_c_header(table=qmckl_asymp_jasb_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_asymp_jasb_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ); #+end_src @@ -1892,19 +1857,19 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ); #+end_src @@ -2202,21 +2167,21 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & end function qmckl_compute_factor_ee_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, - double* const factor_ee_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); #+end_src @@ -2224,8 +2189,8 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & - (context, & + integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & + (context, & walk_num, & elec_num, & up_num, & @@ -2235,7 +2200,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & ee_distance_rescaled_deriv_e, & asymp_jasb, & factor_ee_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -2245,7 +2210,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer (c_int64_t) , intent(in) , value :: elec_num integer (c_int64_t) , intent(in) , value :: up_num integer (c_int64_t) , intent(in) , value :: bord_num - real (c_double ) , intent(in) :: bord_vector(bord_num + 1) + real (c_double ) , intent(in) :: bord_vector(bord_num+1) real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) real (c_double ) , intent(in) :: ee_distance_rescaled_deriv_e(elec_num,elec_num,4,walk_num) real (c_double ) , intent(in) :: asymp_jasb(2) @@ -2253,7 +2218,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f info = qmckl_compute_factor_ee_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & up_num, & @@ -2676,21 +2641,20 @@ qmckl_exit_code qmckl_compute_factor_en ( #+end_src - #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ); #+end_src @@ -2970,22 +2934,21 @@ integer function qmckl_compute_factor_en_deriv_e_f( & end function qmckl_compute_factor_en_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_en_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_en_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_en_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + const double* en_distance_rescaled_deriv_e, + double* const factor_en_deriv_e ); #+end_src @@ -2994,7 +2957,7 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_en_deriv_e & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -3005,7 +2968,7 @@ end function qmckl_compute_factor_en_deriv_e_f en_distance_rescaled, & en_distance_rescaled_deriv_e, & factor_en_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3017,14 +2980,14 @@ end function qmckl_compute_factor_en_deriv_e_f integer (c_int64_t) , intent(in) , value :: type_nucl_num integer (c_int64_t) , intent(in) :: type_nucl_vector(nucl_num) integer (c_int64_t) , intent(in) , value :: aord_num - real (c_double ) , intent(in) :: aord_vector(aord_num + 1, type_nucl_num) + real (c_double ) , intent(in) :: aord_vector(type_nucl_num,aord_num+1) real (c_double ) , intent(in) :: en_distance_rescaled(elec_num,nucl_num,walk_num) real (c_double ) , intent(in) :: en_distance_rescaled_deriv_e(elec_num,nucl_num,4,walk_num) real (c_double ) , intent(out) :: factor_en_deriv_e(elec_num,4,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_en_deriv_e_f info = qmckl_compute_factor_en_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -3366,18 +3329,17 @@ integer function qmckl_compute_een_rescaled_e_f( & end function qmckl_compute_een_rescaled_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3385,9 +3347,8 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_een_rescaled_e & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & - ee_distance, een_rescaled_e) & - bind(C) result(info) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3402,7 +3363,7 @@ end function qmckl_compute_een_rescaled_e_f integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f info = qmckl_compute_een_rescaled_e_f & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) end function qmckl_compute_een_rescaled_e #+end_src @@ -3597,7 +3558,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) *** Compute :PROPERTIES: - :Name: qmckl_compute_een_rescaled_e_deriv_e + :Name: qmckl_compute_factor_een_rescaled_e_deriv_e :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: @@ -3704,21 +3665,20 @@ integer function qmckl_compute_factor_een_rescaled_e_deriv_e_f( & end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* coord_new, - const double* ee_distance, - const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); - #+end_src + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* coord_new, + const double* ee_distance, + const double* een_rescaled_e, + double* const een_rescaled_e_deriv_e ); + #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3726,7 +3686,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_e_deriv_e & - (context, & + (context, & walk_num, & elec_num, & cord_num, & @@ -3735,7 +3695,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f ee_distance, & een_rescaled_e, & een_rescaled_e_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3752,7 +3712,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_e_deriv_e_f info = qmckl_compute_factor_een_rescaled_e_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & cord_num, & @@ -4126,19 +4086,18 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ); #+end_src *** Test @@ -4433,22 +4392,21 @@ integer function qmckl_compute_factor_een_rescaled_n_deriv_e_f( & end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_een_rescaled_n_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* coord_new, - const double* coord, - const double* en_distance, - const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* coord_new, + const double* coord, + const double* en_distance, + const double* een_rescaled_n, + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -4456,7 +4414,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_n_deriv_e & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -4467,7 +4425,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f en_distance, & een_rescaled_n, & een_rescaled_n_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -4481,12 +4439,12 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f real (c_double ) , intent(in) :: coord_new(elec_num,3,walk_num) real (c_double ) , intent(in) :: coord(nucl_num,3) real (c_double ) , intent(in) :: en_distance(nucl_num,elec_num,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n(0:cord_num,nucl_num,elec_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_n_deriv_e_f info = qmckl_compute_factor_een_rescaled_n_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -4891,64 +4849,54 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } /* Choose the correct compute function (depending on offload type) */ - switch(ctx->jastrow.offload_type) { - case OFFLOAD_OPENACC: - #ifdef HAVE_OPENACC_OFFLOAD - rc = - qmckl_compute_tmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #else - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - - #endif - break; - case OFFLOAD_CUBLAS: - #ifdef HAVE_CUBLAS_OFFLOAD - rc = - qmckl_compute_tmp_c_cublas_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #else - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #endif - break; - default: - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - break; +#ifdef HAVE_HPC + const bool gpu_offload = ctx->jastrow.gpu_offload; +#else + const bool gpu_offload = false; +#endif + + if (gpu_offload) { +#ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#elif HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#elif HAVE_OPENMP_OFFLOAD + rc = qmckl_compute_tmp_c_omp_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#else + rc = QMCKL_FAILURE; +#endif + } else { + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); } - + ctx->jastrow.tmp_c_date = ctx->date; } @@ -4988,18 +4936,44 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - switch(ctx->jastrow.offload_type) { - case OFFLOAD_OPENACC: - #ifdef HAVE_OPENACC_OFFLOAD - rc = qmckl_compute_dtmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #else +#ifdef HAVE_HPC + const bool gpu_offload = ctx->jastrow.gpu_offload; +#else + const bool gpu_offload = false; +#endif + + if (gpu_offload) { +#ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_dtmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#elif HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#elif HAVE_OPENMP_OFFLOAD + rc = qmckl_compute_dtmp_c_omp_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#else + rc = QMCKL_FAILURE; +#endif + } else { rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -5008,39 +4982,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #endif - break; - case OFFLOAD_CUBLAS: - #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_dtmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #else - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #endif - break; - default: - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - break; } if (rc != QMCKL_SUCCESS) { @@ -5148,14 +5089,13 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect ); + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect ); #+end_src @@ -5224,18 +5164,17 @@ integer function qmckl_compute_cord_vect_full_f( & end function qmckl_compute_cord_vect_full_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); #+end_src @@ -5244,8 +5183,8 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_cord_vect_full & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & - bind(C) result(info) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -5260,7 +5199,7 @@ end function qmckl_compute_cord_vect_full_f integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f info = qmckl_compute_cord_vect_full_f & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) end function qmckl_compute_cord_vect_full #+end_src @@ -5381,15 +5320,14 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ); #+end_src @@ -5413,6 +5351,38 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) +{ +#ifdef HAVE_HPC + return qmckl_compute_tmp_c_hpc(context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c); +#else + return qmckl_compute_tmp_c_doc(context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c); +#endif +} + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_tmp_c_doc_f( & context, cord_num, elec_num, nucl_num, & @@ -5481,8 +5451,20 @@ integer function qmckl_compute_tmp_c_doc_f( & end function qmckl_compute_tmp_c_doc_f #+end_src -#+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5508,6 +5490,7 @@ integer(c_int32_t) function qmckl_compute_tmp_c_doc & end function qmckl_compute_tmp_c_doc #+end_src +**** CPU :noexport: #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_tmp_c_hpc ( @@ -5559,16 +5542,15 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; +#ifdef HAVE_OPENMP +#pragma omp parallel for collapse(2) +#endif for (int64_t nw=0; nw < walk_num; ++nw) { for (int64_t i=0; i Date: Wed, 6 Apr 2022 17:10:23 +0200 Subject: [PATCH 27/27] Fix CI --- org/qmckl_jastrow.org | 2 -- 1 file changed, 2 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 9173a43..c4f2e28 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1116,8 +1116,6 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { /* Decide if the Jastrow if offloaded on GPU or not */ #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; -#else - ctx->jastrow.gpu_offload = false; #endif qmckl_exit_code rc = QMCKL_SUCCESS;