From 26fe759209c821a90eb3f1e4fdc86a75542445a5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 27 Feb 2022 12:35:58 +0100 Subject: [PATCH 01/15] Added examples.org --- org/examples.org | 190 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 org/examples.org diff --git a/org/examples.org b/org/examples.org new file mode 100644 index 0000000..6f8f4d0 --- /dev/null +++ b/org/examples.org @@ -0,0 +1,190 @@ +#+TITLE: Code examples +#+SETUPFILE: ../tools/theme.setup +#+INCLUDE: ../tools/lib.org + +In this section, we present examples of usage of QMCkl. +For simplicity, we assume that the wave function parameters are stores +in a [[https://github.com/TREX-CoE/trexio][TREXIO]] file. + +* Checking errors + + All QMCkl functions return an error code. A convenient way to handle + errors is to write an error-checking function that displays the + error in text format and exits the program. + + #+NAME: qmckl_check_error + #+begin_src f90 +subroutine qmckl_check_error(rc, message) + use qmckl + implicit none + integer(qmckl_exit_code), intent(in) :: rc + character(len=*) , intent(in) :: message + character(len=128) :: str_buffer + if (rc /= QMCKL_SUCCESS) then + print *, message + call qmckl_string_of_error(rc, str_buffer) + print *, str_buffer + call exit(rc) + end if +end subroutine qmckl_check_error + #+end_src + +* Computing an atomic orbital on a grid + :PROPERTIES: + :header-args: :tangle ao_grid.f90 + :END: + + The following program, in Fortran, computes the values of an atomic + orbital on a regular 3-dimensional grid. The 100^3 grid points are + automatically defined, such that the molecule fits in a box with 5 + atomic units in the borders. + + This program uses the ~qmckl_check_error~ function defined above. + + To use this program, run + + #+begin_src bash :tangle no +$ ao_grid + #+end_src + + + #+begin_src f90 :noweb yes +<> + +program ao_grid + use qmckl + implicit none + + integer(qmckl_context) :: qmckl_ctx ! QMCkl context + integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions + + character(len=128) :: trexio_filename + character(len=128) :: str_buffer + integer :: ao_id + integer :: point_num_x + + integer(c_int64_t) :: nucl_num + double precision, allocatable :: nucl_coord(:,:) + + integer(c_int64_t) :: point_num + integer(c_int64_t) :: ao_num + integer(c_int64_t) :: ipoint, i, j, k + double precision :: x, y, z, dr(3) + double precision :: rmin(3), rmax(3) + double precision, allocatable :: points(:,:) + double precision, allocatable :: ao_vgl(:,:,:) + #+end_src + + Start by fetching the command-line arguments: + + #+begin_src f90 + if (iargc() /= 3) then + print *, 'Syntax: ao_grid ' + call exit(-1) + end if + call getarg(1, trexio_filename) + call getarg(2, str_buffer) + read(str_buffer, *) ao_id + call getarg(3, str_buffer) + read(str_buffer, *) point_num_x + + if (point_num_x < 0 .or. point_num_x > 300) then + print *, 'Error: 0 < point_num < 300' + call exit(-1) + end if + #+end_src + + Create the QMCkl context and initialize it with the wave function + present in the TREXIO file: + + #+begin_src f90 + qmckl_ctx = qmckl_context_create() + rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) + call qmckl_check_error(rc, 'Read TREXIO') + #+end_src + + Now we will compute the limits of the box in which the molecule fits. + For that, we first need to ask QMCkl the coordinates of nuclei. + + #+begin_src f90 + rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) + call qmckl_check_error(rc, 'Get nucleus num') + + allocate( nucl_coord(3, nucl_num) ) + rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) + call qmckl_check_error(rc, 'Get nucleus coord') + #+end_src + + We now compute the coordinates of opposite points of the box, and + the distance between points along the 3 directions: + + #+begin_src f90 + rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 + rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 + rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 + + rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0 + rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0 + rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 + + dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) + #+end_src + + We now produce the list of point coordinates where the AO will be + evaluated: + + #+begin_src f90 + point_num = point_num_x**3 + allocate( points(point_num, 3) ) + ipoint=0 + z = rmin(3) + do k=1,point_num_x + y = rmin(2) + do j=1,point_num_x + x = rmin(1) + do i=1,point_num_x + ipoint = ipoint+1 + points(ipoint,1) = x + points(ipoint,2) = y + points(ipoint,3) = z + x = x + dr(1) + end do + y = y + dr(2) + end do + z = z + dr(3) + end do + #+end_src + + We give the points to QMCkl: + + #+begin_src f90 + rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) + call qmckl_check_error(rc, 'Setting points') + #+end_src + + We allocate the space required to retrieve the values, gradients and + Laplacian of all AOs, and ask to retrieve the values of the + AOs computed at the point positions. For that, we first need to know + the number of AOs: + + #+begin_src f90 + rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) + call qmckl_check_error(rc, 'Getting ao_num') + + allocate( ao_vgl(ao_num, 5, point_num) ) + rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) + call qmckl_check_error(rc, 'Setting points') + #+end_src + + We finally print the value of the AO: + + #+begin_src f90 + do ipoint=1, point_num + print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) + end do + #+end_src + + #+begin_src f90 + deallocate( nucl_coord, points, ao_vgl ) +end program ao_grid + #+end_src From 22cd823edf0959f9e65941cdc7f8ca58f6e1e394 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 27 Feb 2022 23:31:52 +0100 Subject: [PATCH 02/15] Working on generalized contractions --- org/examples.org | 19 +++++-- org/qmckl_ao.org | 128 +++++++++++++++++++++++++++++++++++++++----- org/qmckl_point.org | 7 +-- 3 files changed, 134 insertions(+), 20 deletions(-) diff --git a/org/examples.org b/org/examples.org index 6f8f4d0..58dc366 100644 --- a/org/examples.org +++ b/org/examples.org @@ -103,6 +103,19 @@ program ao_grid call qmckl_check_error(rc, 'Read TREXIO') #+end_src + We need to check that ~ao_id~ is in the range, so we get the total + number of AOs from QMCkl: + + #+begin_src f90 + rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) + call qmckl_check_error(rc, 'Getting ao_num') + + if (ao_id < 0 .or. ao_id > ao_num) then + print *, 'Error: 0 < ao_id < ', ao_num + call exit(-1) + end if + #+end_src + Now we will compute the limits of the box in which the molecule fits. For that, we first need to ask QMCkl the coordinates of nuclei. @@ -164,13 +177,9 @@ program ao_grid We allocate the space required to retrieve the values, gradients and Laplacian of all AOs, and ask to retrieve the values of the - AOs computed at the point positions. For that, we first need to know - the number of AOs: + AOs computed at the point positions. #+begin_src f90 - rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) - call qmckl_check_error(rc, 'Getting ao_num') - allocate( ao_vgl(ao_num, 5, point_num) ) rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) call qmckl_check_error(rc, 'Setting points') diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index c821c67..1365e69 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -1,4 +1,3 @@ -B #+TITLE: Atomic Orbitals #+SETUPFILE: ../tools/theme.setup #+INCLUDE: ../tools/lib.org @@ -57,6 +56,7 @@ gradients and Laplacian of the atomic basis functions. #define QMCKL_AO_HPT #include +#include #+end_src #+begin_src f90 :tangle (eval f) :noweb yes @@ -2638,7 +2638,8 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } #+end_src -*** HPC-specific data structures +*** TODO HPC-specific data structures + For faster access, we provide extra arrays for the shell information as: - $C_{psa}$ = =coef_per_nucleus (iprim, ishell, inucl)= @@ -2659,6 +2660,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { #+NAME:HPC_struct #+begin_src c :comments org :exports none /* HPC specific data structures */ + int32_t* prim_num_per_nucleus; qmckl_tensor coef_per_nucleus; qmckl_matrix expo_per_nucleus; #+end_src @@ -2668,16 +2670,37 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context); #+end_src #+begin_src c :comments org :tangle (eval c) :exports none +/* Data structure for sorting */ +struct combined { + double expo; + int64_t index; +}; + +/* Comparison function */ +int compare_basis( const void * l, const void * r ) +{ + const struct combined * _l= (const struct combined *)l; + const struct combined * _r= (const struct combined *)r; + if( _l->expo > _r->expo ) return 1; + if( _l->expo < _r->expo ) return -1; + return 0; +} + qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) { qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; + mem_info.size = ctx->nucleus.num * sizeof(int32_t); + ctx->ao_basis.prim_num_per_nucleus = (int32_t*) qmckl_malloc(context, mem_info); + + /* Find max number of primitives per nucleus */ int64_t shell_max = 0; int64_t prim_max = 0; int64_t nucl_num = ctx->nucleus.num; for (int inucl=0 ; inucl < nucl_num ; ++inucl) { shell_max = ctx->ao_basis.nucleus_shell_num[inucl] > shell_max ? - ctx->ao_basis.nucleus_shell_num[inucl] : shell_max; + ctx->ao_basis.nucleus_shell_num[inucl] : shell_max; int64_t prim_num = 0; const int64_t ishell_start = ctx->ao_basis.nucleus_index[inucl]; @@ -2686,9 +2709,11 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) prim_num += ctx->ao_basis.shell_prim_num[ishell]; } prim_max = prim_num > prim_max ? - prim_num : prim_max; + prim_num : prim_max; + ctx->ao_basis.prim_num_per_nucleus[inucl] = prim_num; } + int64_t size[3] = { prim_max, shell_max, nucl_num }; ctx->ao_basis.coef_per_nucleus = qmckl_tensor_alloc( context, 3, size ); ctx->ao_basis.coef_per_nucleus = qmckl_tensor_set(ctx->ao_basis.coef_per_nucleus, 0.); @@ -2696,9 +2721,10 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) ctx->ao_basis.expo_per_nucleus = qmckl_matrix_alloc( context, prim_max, nucl_num ); ctx->ao_basis.expo_per_nucleus = qmckl_matrix_set(ctx->ao_basis.expo_per_nucleus, 0.); - double expo[prim_max]; + struct combined expo[prim_max]; double coef[shell_max][prim_max]; - for (int inucl=0 ; inucl < nucl_num ; ++inucl) { + double newcoef[prim_max]; + for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) { memset(expo, 0, sizeof(expo)); memset(coef, 0, sizeof(expo)); @@ -2710,23 +2736,86 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell]; const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell]; for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { - expo[idx] = ctx->ao_basis.coefficient_normalized[iprim]; - coef[ishell - ishell_start][idx] = ctx->ao_basis.coefficient_normalized[iprim]; + expo[idx].expo = ctx->ao_basis.exponent[iprim]; + expo[idx].index = idx; idx += 1; } } - for (int64_t i=0 ; iao_basis.expo_per_nucleus, i, inucl ) = expo[i]; + /* Sort exponents */ + qsort( expo, (size_t) idx, sizeof(struct combined), compare_basis ); + + idx = 0; + int64_t idx2 = 0; + for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { + + memset(newcoef, 0, sizeof(newcoef)); + const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell]; + const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell]; + for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { +// newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim]; + newcoef[idx] = ctx->ao_basis.coefficient[iprim]; + idx += 1; + } + for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + idx2 = expo[i].index; + coef[ishell - ishell_start][i] = newcoef[idx2]; + } } - for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { + if (expo[i].expo != expo[i-1].expo) { + idx += 1; + } + newidx[i] = idx; + } + idxmax = idx; + + for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { + newcoef[newidx[i]] += coef[j][i]; + } + for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + coef[j][i] = newcoef[i]; + } + } + + for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + expo[newidx[i]].expo = expo[i].expo; + } + ctx->ao_basis.prim_num_per_nucleus[inucl] = idxmax+1; + + for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i].expo; + } + + for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ) = coef[j][i]; } } +/* + for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + printf("%4ld %4ld %15.5e | ", inucl, i, qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl )); + for (int64_t j=0 ; jao_basis.coef_per_nucleus, i, j, inucl )); + } + printf("\n"); + } + printf("\n"); +*/ } + + return QMCKL_SUCCESS; } @@ -5505,6 +5594,21 @@ qmckl_compute_ao_vgl_hpc_gaussian ( break; } + /* Compute all exponents */ + /* + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus ; ++i) { + const double v = expo[iprim]*r2; + if (v > cutoff) { + nidx = i; + break; + } + ar2[i] = -v; + } + + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus ; ++i) { + } + */ + const int64_t ishell_start = nucleus_index[inucl]; const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { diff --git a/org/qmckl_point.org b/org/qmckl_point.org index 08bda8c..07b0863 100644 --- a/org/qmckl_point.org +++ b/org/qmckl_point.org @@ -257,7 +257,8 @@ end interface enough, we overwrite the data contained in them. To set the data relative to the points in the context, one of the - following functions need to be called. + following functions need to be called. Here, ~num~ is the number of + points to set. #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_point (qmckl_context context, @@ -348,7 +349,7 @@ qmckl_set_point (qmckl_context context, #+begin_src f90 :comments org :tangle (eval fh_func) :noweb yes interface integer(c_int32_t) function qmckl_set_point(context, & - transp, coord, size_max) bind(C) + transp, coord, num) bind(C) use, intrinsic :: iso_c_binding import implicit none @@ -356,7 +357,7 @@ interface integer (c_int64_t) , intent(in) , value :: context character(c_char) , intent(in) , value :: transp real (c_double ) , intent(in) :: coord(*) - integer (c_int64_t) , intent(in) , value :: size_max + integer (c_int64_t) , intent(in) , value :: num end function end interface #+end_src From 370a12803a1e116a3ebeeca5dba8e4cda0d5f9a6 Mon Sep 17 00:00:00 2001 From: Evgeny Posenitskiy <45995097+q-posev@users.noreply.github.com> Date: Wed, 2 Mar 2022 09:13:50 +0100 Subject: [PATCH 03/15] Fix test-build GitHub Action (#67) * Fix bugs in the .yml file * Fix test step in the GitHub CI When using out of source build all `make` rules should be executed in the corresponding directory (e.g. `_build`) * Add CI step to test TREXIO on MacOS * Explicitly provide gcc-10 and gfortran-10 to configure --- .github/workflows/test-build.yml | 44 ++++++++++---------------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 41b59e0..c7af73f 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -47,6 +47,7 @@ jobs: - name: Run test run: make -j 4 check + working-directory: _build - name: Archive test log file if: failure() @@ -57,13 +58,7 @@ jobs: - name: Dist test run: make distcheck - - - name: Archive test log file - if: failure() - uses: actions/upload-artifact@v2 - with: - name: dist-report-ubuntu - path: test-suite.log + working-directory: _build x86_macos: @@ -91,37 +86,26 @@ jobs: git clone https://github.com/TREX-CoE/trexio.git cd trexio ./autogen.sh - mkdir _build - cd _build - ../configure --enable-silent-rules - make -j 4 - - - name: Run test - run: make -j 4 check - - - name: Archive test log file - if: failure() - uses: actions/upload-artifact@v2 - with: - name: test-report-ubuntu - path: test-suite.log - - - name: Dist test - run: make distcheck - - - name: Archive test log file - if: failure() - uses: actions/upload-artifact@v2 - with: ./configure --prefix=${PWD}/_install --enable-silent-rules make -j 4 make install + - name: Test TREXIO + run: make -j 4 check + working-directory: trexio + + - name: Archive TREXIO test log file + if: failure() + uses: actions/upload-artifact@v2 + with: + name: test-report-trexio-macos + path: trexio/test-suite.log + - name: Build QMCkl run: | export PKG_CONFIG_PATH=${PWD}/trexio/_install/lib/pkgconfig:$PKG_CONFIG_PATH ./autogen.sh - ./configure --enable-silent-rules --enable-debug + ./configure CC=gcc-10 FC=gfortran-10 --enable-silent-rules --enable-debug make -j 4 - name: Run test From 11cb1de4a5a39b7dbcb034c83fc88fd780208787 Mon Sep 17 00:00:00 2001 From: q-posev Date: Fri, 4 Mar 2022 11:27:59 +0100 Subject: [PATCH 04/15] Create FindQMCKL module --- cmake/FindQMCKL.cmake | 77 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 cmake/FindQMCKL.cmake diff --git a/cmake/FindQMCKL.cmake b/cmake/FindQMCKL.cmake new file mode 100644 index 0000000..89b0b11 --- /dev/null +++ b/cmake/FindQMCKL.cmake @@ -0,0 +1,77 @@ +#=========================================== + +# Try to find the QMCkl library; +# If found, it will define the following variables (note the plural form): +# QMCKL_FOUND - System has libqmckl; +# QMCKL_INCLUDE_DIRS - The QMCKL include directories; +# QMCKL_LIBRARIES - The libraries needed to use QMCKL; + +# If QMCKL has been installed in a non-standard location, one can set an +# environment variable $QMCKL_DIR in the current shell: +# $ export QMCKL_DIR= +# to indicate the prefix used during the QMCKL installation +# (typically `./configure prefix= ..` or `cmake -DCMAKE_INSTALL_DIR= ..`) + +# This file should be located WITHIN your project source tree. +# (e.g. in cmake/FindQMCKL.cmake) +# How to use it in your project CMakeLists.txt: + +# This is needed to locate FindQMCKL.cmake file, modify it according to your source tree. +# list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/") + +# find_package(QMCKL) +# if (QMCKL_FOUND) +# include_directories(${QMCKL_INCLUDE_DIRS}) +# target_link_libraries(your_target ${QMCKL_LIBRARIES}) +# endif() + +#=========================================== + +# This file is distirbuted under the BSD 3-Clause License. +# Copyright (c) 2021, TREX Center of Excellence + +#=========================================== + +message("") + +set(QMCKL_SEARCH_PATHS + ~/Library/Frameworks + /Library/Frameworks + /usr/local + /usr + /sw # Fink + /opt/local # DarwinPorts + /opt/csw # Blastwave + /opt +) + +find_path(QMCKL_INCLUDE_DIR + NAMES qmckl.h + HINTS $ENV{QMCKL_DIR} + PATH_SUFFIXES include/qmckl include + PATHS ${QMCKL_SEARCH_PATHS} + ) + + +# No need to specify platform-specific prefix (e.g. libqmckl on Unix) or +# suffix (e.g. .so on Unix or .dylib on MacOS) in NAMES. CMake takes care of that. +find_library(QMCKL_LIBRARY + NAMES qmckl + HINTS $ENV{QMCKL_DIR} + PATH_SUFFIXES lib64 lib + PATHS ${QMCKL_SEARCH_PATHS} + ) + +message("") + +# Handle the QUIETLY and REQUIRED arguments and set QMCKL_FOUND to TRUE if +# all listed variables are TRUE. +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(QMCKL DEFAULT_MSG QMCKL_LIBRARY QMCKL_INCLUDE_DIR ) +MARK_AS_ADVANCED(QMCKL_INCLUDE_DIR QMCKL_LIBRARY) + +# Mot setting _INCLUDE_DIR and _LIBRARIES is considered a bug, +# see https://gitlab.kitware.com/cmake/community/-/wikis/doc/tutorials/How-To-Find-Libraries +set(QMCKL_LIBRARIES ${QMCKL_LIBRARY}) +set(QMCKL_INCLUDE_DIRS ${QMCKL_INCLUDE_DIR}) + From f85c52f3f8e4eb44f2c850bd5dc6aff27cbc56ee Mon Sep 17 00:00:00 2001 From: q-posev Date: Fri, 4 Mar 2022 11:29:56 +0100 Subject: [PATCH 05/15] Add linking instructions to README --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 4ab5bae..703bfee 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,24 @@ sudo make install sudo make installcheck ``` + +## Linking to your program + +The `make install` command takes care of installing the QMCkl shared library on the user machine. +Once installed, add `-lqmckl` to the list of compiler options. + +In some cases (e.g. when using custom `prefix` during configuration), the QMCkl library might end up installed in a directory, which is absent in the default `$LIBRARY_PATH`. +In order to link the program against QMCkl, the search paths can be modified as follows: + +`export LIBRARY_PATH=$LIBRARY_PATH:/lib` + +(same holds for `$LD_LIBRARY_PATH`). The `` has to be replaced with the prefix used during the installation. + +If your project relies on the CMake build system, feel free to use the +[FindQMCKL.cmake](https://github.com/TREX-CoE/qmckl/blob/master/cmake/FindQMCKL.cmake) +module to find and link the QMCkl library automatically. + + ## Verificarlo CI Since Verificarlo should not be a dependency of QMCkl, all Verificarlo From 5ecb1d63267724ff06342e8863cea35609e79261 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Mar 2022 18:32:39 +0100 Subject: [PATCH 06/15] Faster AOs --- org/qmckl_ao.org | 864 ++++++++++++++++++++++++--------------------- org/qmckl_blas.org | 6 +- tools/theme.setup | 4 +- 3 files changed, 463 insertions(+), 411 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index d3d5012..5bc9235 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -48,6 +48,7 @@ gradients and Laplacian of the atomic basis functions. #+begin_src c :tangle (eval h_private_func) #ifndef QMCKL_AO_HPF #define QMCKL_AO_HPF +#include "qmckl_blas_private_type.h" #+end_src @@ -57,6 +58,7 @@ gradients and Laplacian of the atomic basis functions. #include #include +#include "qmckl_blas_private_type.h" #+end_src #+begin_src f90 :tangle (eval f) :noweb yes @@ -105,6 +107,7 @@ int main() { #include "qmckl.h" #include "qmckl_context_private_type.h" #include "qmckl_memory_private_type.h" +#include "qmckl_blas_private_type.h" #include "qmckl_memory_private_func.h" #include "qmckl_ao_private_type.h" #include "qmckl_ao_private_func.h" @@ -271,35 +274,35 @@ end interface #+begin_src c :comments org :tangle (eval h_private_type) :noweb yes typedef struct qmckl_ao_basis_struct { - int64_t shell_num; - int64_t prim_num; - int64_t ao_num; - int64_t * nucleus_index; - int64_t * nucleus_shell_num; - int32_t * shell_ang_mom; - int64_t * shell_prim_num; - int64_t * shell_prim_index; - double * shell_factor; - double * exponent; - double * coefficient; - double * prim_factor; - double * ao_factor; + int64_t shell_num; + int64_t prim_num; + int64_t ao_num; + int64_t * restrict nucleus_index; + int64_t * restrict nucleus_shell_num; + int32_t * restrict shell_ang_mom; + int64_t * restrict shell_prim_num; + int64_t * restrict shell_prim_index; + double * restrict shell_factor; + double * restrict exponent; + double * restrict coefficient; + double * restrict prim_factor; + double * restrict ao_factor; - int64_t * nucleus_prim_index; - double * coefficient_normalized; - int32_t * nucleus_max_ang_mom; - double * nucleus_range; - double * primitive_vgl; - uint64_t primitive_vgl_date; - double * shell_vgl; - uint64_t shell_vgl_date; - double * ao_vgl; - uint64_t ao_vgl_date; + int64_t * restrict nucleus_prim_index; + double * restrict coefficient_normalized; + int32_t * restrict nucleus_max_ang_mom; + double * restrict nucleus_range; + double * restrict primitive_vgl; + uint64_t primitive_vgl_date; + double * restrict shell_vgl; + uint64_t shell_vgl_date; + double * restrict ao_vgl; + uint64_t ao_vgl_date; - int32_t uninitialized; - bool provided; - bool ao_cartesian; - char type; + int32_t uninitialized; + bool provided; + bool ao_cartesian; + char type; #ifdef HAVE_HPC <> #endif @@ -774,14 +777,14 @@ qmckl_set_ao_basis_shell_prim_num (qmckl_context context, } #+end_src - + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_shell_prim_index (qmckl_context context, const int64_t* shell_prim_index, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_ao_basis_shell_prim_index (qmckl_context context, @@ -835,15 +838,15 @@ qmckl_set_ao_basis_shell_prim_index (qmckl_context context, <> } #+end_src - - + + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_shell_factor (qmckl_context context, const double* shell_factor, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_ao_basis_shell_factor (qmckl_context context, @@ -897,15 +900,15 @@ qmckl_set_ao_basis_shell_factor (qmckl_context context, <> } #+end_src - - + + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_exponent (qmckl_context context, const double* exponent, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_ao_basis_exponent (qmckl_context context, @@ -2642,11 +2645,11 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { For faster access, we provide extra arrays for the shell information as: - - $C_{psa}$ = =coef_per_nucleus (iprim, ishell, inucl)= - - $\gamma_{pa}$ =expo_per_nucleus (iprim, inucl)= + - $C_{psa}$ = =coef_per_nucleus[inucl][ishell][iprim]= + - $\gamma_{pa}$ =expo_per_nucleus[inucl][iprim]= such that the computation of the radial parts can be vectorized. - + Exponents are sorted in increasing order to exit loops quickly, and only unique exponents are kept. This also allows to pack the exponents to enable vectorization of exponentials. @@ -2656,13 +2659,13 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { R_{sa} = \sum_p C_{psa} \gamma_{pa} \] which is a matrix-vector product. - - #+NAME:HPC_struct + + #+NAME:HPC_struct #+begin_src c :comments org :exports none /* HPC specific data structures */ - int32_t* prim_num_per_nucleus; - qmckl_tensor coef_per_nucleus; - qmckl_matrix expo_per_nucleus; + int32_t* restrict prim_num_per_nucleus; + qmckl_tensor coef_per_nucleus; + qmckl_matrix expo_per_nucleus; #+end_src #+begin_src c :comments org :tangle (eval h_private_func) :exports none @@ -2670,7 +2673,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context); #endif #+end_src - + #+begin_src c :comments org :tangle (eval c) :exports none /* Data structure for sorting */ struct combined { @@ -2681,9 +2684,9 @@ struct combined { /* Comparison function */ int compare_basis( const void * l, const void * r ) { - const struct combined * _l= (const struct combined *)l; - const struct combined * _r= (const struct combined *)r; - if( _l->expo > _r->expo ) return 1; + const struct combined * restrict _l= (const struct combined *)l; + const struct combined * restrict _r= (const struct combined *)r; + if( _l->expo > _r->expo ) return 1; if( _l->expo < _r->expo ) return -1; return 0; } @@ -2735,7 +2738,7 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) const int64_t ishell_start = ctx->ao_basis.nucleus_index[inucl]; const int64_t ishell_end = ctx->ao_basis.nucleus_index[inucl] + ctx->ao_basis.nucleus_shell_num[inucl]; for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - + const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell]; const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell]; for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { @@ -2751,16 +2754,15 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) idx = 0; int64_t idx2 = 0; for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - + memset(newcoef, 0, sizeof(newcoef)); const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell]; const int64_t iprim_end = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell]; for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { -// newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim]; - newcoef[idx] = ctx->ao_basis.coefficient[iprim]; + newcoef[idx] = ctx->ao_basis.coefficient_normalized[iprim]; idx += 1; } - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { idx2 = expo[i].index; coef[ishell - ishell_start][i] = newcoef[idx2]; } @@ -2773,7 +2775,7 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) int64_t idxmax = 0; idx = 0; newidx[0] = 0; - for (int64_t i=1 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=1 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { if (expo[i].expo != expo[i-1].expo) { idx += 1; } @@ -2781,33 +2783,33 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) } idxmax = idx; - for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { newcoef[newidx[i]] += coef[j][i]; } - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { coef[j][i] = newcoef[i]; } } - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { expo[newidx[i]].expo = expo[i].expo; } ctx->ao_basis.prim_num_per_nucleus[inucl] = idxmax+1; - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i].expo; } - for (int64_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { + for (int32_t j=0 ; jao_basis.prim_num_per_nucleus[inucl] ; ++i) { qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ) = coef[j][i]; } } -/* - for (int64_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { +/* + for (int32_t i=0 ; iao_basis.prim_num_per_nucleus[inucl] ; ++i) { printf("%4ld %4ld %15.5e | ", inucl, i, qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl )); for (int64_t j=0 ; jao_basis.coef_per_nucleus, i, j, inucl )); @@ -2818,11 +2820,11 @@ qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context) */ } - + return QMCKL_SUCCESS; } #endif - + #+end_src *** Access functions @@ -4789,13 +4791,13 @@ end function qmckl_ao_polynomial_transp_vgl_doc_f #+begin_src c :tangle (eval c) :comments org qmckl_exit_code qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, - const double* X, - const double* R, + const double* restrict X, + const double* restrict R, const int32_t lmax, - int64_t* n, - int32_t* const L, + int64_t* restrict n, + int32_t* restrict const L, const int64_t ldl, - double* const VGL, + double* restrict const VGL, const int64_t ldv ) { const qmckl_context_struct* ctx = (qmckl_context_struct* const) context; @@ -4831,7 +4833,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, } else { - int32_t* const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)}; + int32_t* restrict const l[4] = {L, L+ldl, L+(ldl<<1), L+ldl+(ldl<<1)}; l[0][0] = 0; l[0][1] = 0; l[0][2] = 0; l[1][0] = 1; l[1][1] = 0; l[1][2] = 0; l[2][0] = 0; l[2][1] = 1; l[2][2] = 0; @@ -4852,11 +4854,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, } else { - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv << 1); - double* const vgl4 = VGL + ldv + (ldv << 1); - double* const vgl5 = VGL + (ldv << 2); + double* restrict const vgl1 = VGL; + double* restrict const vgl2 = VGL + ldv; + double* restrict const vgl3 = VGL + (ldv << 1); + double* restrict const vgl4 = VGL + ldv + (ldv << 1); + double* restrict const vgl5 = VGL + (ldv << 2); for (int32_t k=0 ; k<4 ; ++k) { vgl2[k] = 0.0; @@ -4886,7 +4888,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, L[i] = ltmp[i]; } else { - int32_t* l[10]; + int32_t* restrict l[10]; for (int32_t i=0 ; i<10 ; ++i) { l[i] = L + i*ldl; } @@ -4922,11 +4924,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, for (int i=0 ; i<50 ; ++i) VGL[i] = tmp[i]; } else { - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv << 1); - double* const vgl4 = VGL + 3*ldv; - double* const vgl5 = VGL + (ldv << 2); + double* restrict const vgl1 = VGL; + double* restrict const vgl2 = VGL + ldv; + double* restrict const vgl3 = VGL + (ldv << 1); + double* restrict const vgl4 = VGL + 3*ldv; + double* restrict const vgl5 = VGL + (ldv << 2); vgl1[0] = 1.0 ; vgl1[1] = Y[0] ; vgl1[2] = Y[1]; vgl1[3] = Y[2] ; vgl1[4] = Y[0]*Y[0]; vgl1[5] = Y[0]*Y[1]; @@ -4961,11 +4963,11 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, const int32_t size_max = (lmax+1)*(lmax+2)*(lmax+3)/6; if (ldv < size_max) return QMCKL_INVALID_ARG_9; - double* const vgl1 = VGL; - double* const vgl2 = VGL + ldv; - double* const vgl3 = VGL + (ldv<<1); - double* const vgl4 = VGL + ldv + (ldv<<1); - double* const vgl5 = VGL + (ldv<<2); + double* restrict const vgl1 = VGL; + double* restrict const vgl2 = VGL + ldv; + double* restrict const vgl3 = VGL + (ldv<<1); + double* restrict const vgl4 = VGL + ldv + (ldv<<1); + double* restrict const vgl5 = VGL + (ldv<<2); const double Y[3] = { X[0]-R[0], X[1]-R[1], @@ -5464,29 +5466,28 @@ end function qmckl_compute_ao_vgl_doc_f | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none #ifdef HAVE_HPC qmckl_exit_code qmckl_compute_ao_vgl_hpc_gaussian ( - const qmckl_context context, - const int64_t ao_num, - const int64_t shell_num, - const int64_t prim_num, - const int64_t point_num, - const int64_t nucl_num, - const double* coord, - const double* nucl_coord, - const int64_t* nucleus_index, - const int64_t* nucleus_shell_num, - const double* nucleus_range, - const int32_t* nucleus_max_ang_mom, - const int32_t* shell_ang_mom, - const int64_t* shell_prim_index, - const int64_t* shell_prim_num, - const double* ao_factor, - const double* expo, - const double* coef_normalized, - double* const ao_vgl ) + const qmckl_context context, + const int64_t ao_num, + const int64_t shell_num, + const int32_t* restrict prim_num_per_nucleus, + const int64_t point_num, + const int64_t nucl_num, + const double* restrict coord, + const double* restrict nucl_coord, + const int64_t* restrict nucleus_index, + const int64_t* restrict nucleus_shell_num, + const double* nucleus_range, + const int32_t* restrict nucleus_max_ang_mom, + const int32_t* restrict shell_ang_mom, + const double* restrict ao_factor, + const qmckl_matrix expo_per_nucleus, + const qmckl_tensor coef_per_nucleus, + double* restrict const ao_vgl ) { int32_t lstart[32]; for (int32_t l=0 ; l<32 ; ++l) { @@ -5495,37 +5496,41 @@ qmckl_compute_ao_vgl_hpc_gaussian ( int64_t ao_index[shell_num+1]; int64_t size_max = 0; + int64_t prim_max = 0; + int64_t shell_max = 0; { - int64_t k=0; - for (int inucl=0 ; inucl < nucl_num ; ++inucl) { - const int64_t ishell_start = nucleus_index[inucl]; - const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; - for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - const int l = shell_ang_mom[ishell]; - ao_index[ishell] = k; - k += lstart[l+1] - lstart[l]; - size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max; - } - } - ao_index[shell_num] = ao_num+1; + int64_t k=0; + for (int inucl=0 ; inucl < nucl_num ; ++inucl) { + prim_max = prim_num_per_nucleus[inucl] > prim_max ? + prim_num_per_nucleus[inucl] : prim_max; + shell_max = nucleus_shell_num[inucl] > shell_max ? + nucleus_shell_num[inucl] : shell_max; + const int64_t ishell_start = nucleus_index[inucl]; + const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; + for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { + const int l = shell_ang_mom[ishell]; + ao_index[ishell] = k; + k += lstart[l+1] - lstart[l]; + size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max; + } + } + ao_index[shell_num] = ao_num+1; } /* Don't compute polynomials when the radial part is zero. */ double cutoff = -log(1.e-12); #ifdef HAVE_OPENMP - #pragma omp parallel +#pragma omp parallel #endif { qmckl_exit_code rc; - double c_[prim_num]; - double expo_[prim_num]; - double ar2[prim_num]; - int32_t powers[prim_num]; + double ar2[prim_max]; + int32_t powers[prim_max]; double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, - {0.0, 1.0, 0.0, 0.0}, - {0.0, 0.0, 1.0, 0.0}, - {0.0, 0.0, 0.0, 1.0}}; + {0.0, 1.0, 0.0, 0.0}, + {0.0, 0.0, 1.0, 0.0}, + {0.0, 0.0, 0.0, 1.0}}; double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, @@ -5533,231 +5538,247 @@ qmckl_compute_ao_vgl_hpc_gaussian ( {0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}}; double poly_vgl[5][size_max]; + double exp_mat[prim_max][8]; + double ce_mat[shell_max][8]; + + double coef_mat[nucl_num][shell_max][prim_max]; + for (int i=0 ; i cutoff * nucleus_range[inucl]) { - continue; + if (r2 > cutoff * nucleus_range[inucl]) { + continue; + } + + int64_t n_poly; + switch (nucleus_max_ang_mom[inucl]) { + case 0: + break; + + case 1: + poly_vgl_l1[0][1] = x; + poly_vgl_l1[0][2] = y; + poly_vgl_l1[0][3] = z; + break; + + case 2: + poly_vgl_l2[0][1] = x; + poly_vgl_l2[0][2] = y; + poly_vgl_l2[0][3] = z; + poly_vgl_l2[0][4] = x*x; + poly_vgl_l2[0][5] = x*y; + poly_vgl_l2[0][6] = x*z; + poly_vgl_l2[0][7] = y*y; + poly_vgl_l2[0][8] = y*z; + poly_vgl_l2[0][9] = z*z; + poly_vgl_l2[1][4] = x+x; + poly_vgl_l2[1][5] = y; + poly_vgl_l2[1][6] = z; + poly_vgl_l2[2][5] = x; + poly_vgl_l2[2][7] = y+y; + poly_vgl_l2[2][8] = z; + poly_vgl_l2[3][6] = x; + poly_vgl_l2[3][8] = y; + poly_vgl_l2[3][9] = z+z; + break; + + default: + rc = qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + &(poly_vgl[0][0]), size_max); + + assert (rc == QMCKL_SUCCESS); + break; + } + + /* Compute all exponents */ + + int64_t nidx = 0; + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2[iprim] = v; + ++nidx; + } else { + break; } + } - int64_t n_poly; - switch (nucleus_max_ang_mom[inucl]) { - case 0: - break; + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim][0] = exp(-ar2[iprim]); + } + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0]; + f = -f-f; + exp_mat[iprim][1] = f * x; + exp_mat[iprim][2] = f * y; + exp_mat[iprim][3] = f * z; + exp_mat[iprim][4] = f * (3.0 - 2.0 * ar2[iprim]); + } - case 1: - poly_vgl_l1[0][1] = x; - poly_vgl_l1[0][2] = y; - poly_vgl_l1[0][3] = z; - break; + for (int i=0 ; i 0) { + const double* restrict f = ao_factor + k; + const int64_t idx = lstart[l]; + + switch (nucleus_max_ang_mom[inucl]) { + case 0: break; - } - - /* Compute all exponents */ - /* - for (int32_t i=0 ; iao_basis.prim_num_per_nucleus ; ++i) { - const double v = expo[iprim]*r2; - if (v > cutoff) { - nidx = i; + case 1: + poly_vgl_1 = &(poly_vgl_l1[0][idx]); + poly_vgl_2 = &(poly_vgl_l1[1][idx]); + poly_vgl_3 = &(poly_vgl_l1[2][idx]); + poly_vgl_4 = &(poly_vgl_l1[3][idx]); + break; + case 2: + poly_vgl_1 = &(poly_vgl_l2[0][idx]); + poly_vgl_2 = &(poly_vgl_l2[1][idx]); + poly_vgl_3 = &(poly_vgl_l2[2][idx]); + poly_vgl_4 = &(poly_vgl_l2[3][idx]); + poly_vgl_5 = &(poly_vgl_l2[4][idx]); + break; + default: + poly_vgl_1 = &(poly_vgl[0][idx]); + poly_vgl_2 = &(poly_vgl[1][idx]); + poly_vgl_3 = &(poly_vgl[2][idx]); + poly_vgl_4 = &(poly_vgl[3][idx]); + poly_vgl_5 = &(poly_vgl[4][idx]); + } + switch (n) { + case(1): + ao_vgl_1[0] = s1 * f[0]; + ao_vgl_2[0] = s2 * f[0]; + ao_vgl_3[0] = s3 * f[0]; + ao_vgl_4[0] = s4 * f[0]; + ao_vgl_5[0] = s5; + break; + case (3): +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int il=0 ; il<3 ; ++il) { + ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; + ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; + ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; + ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; + ao_vgl_5[il] = (poly_vgl_1[il] * s5 + + 2.0*(poly_vgl_2[il] * s2 + + poly_vgl_3[il] * s3 + + poly_vgl_4[il] * s4 )) * f[il]; + } + break; + case(6): +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int il=0 ; il<6 ; ++il) { + ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; + ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; + ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; + ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; + ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 + + 2.0*(poly_vgl_2[il] * s2 + + poly_vgl_3[il] * s3 + + poly_vgl_4[il] * s4 )) * f[il]; + } + break; + default: +#ifdef HAVE_OPENMP +#pragma omp simd simdlen(8) +#endif + for (int il=0 ; ilao_basis.prim_num_per_nucleus ; ++i) { - } - */ - - const int64_t ishell_start = nucleus_index[inucl]; - const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; - for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { - - int64_t nidx = 0; - const int64_t iprim_start = shell_prim_index[ishell]; - const int64_t iprim_end = shell_prim_index[ishell] + shell_prim_num[ishell]; - for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) { - const double v = expo[iprim]*r2; - if (v <= cutoff) { - ar2[nidx] = v; - c_[nidx] = coef_normalized[iprim]; - expo_[nidx] = expo[iprim]; - ++nidx; - } - } - - double s1_ = 0.; - double s5_ = 0.; - double s6_ = 0.; - for (int idx=0 ; idx 0) { - const double* __restrict__ f = ao_factor + k; - const int64_t idx = lstart[l]; - - switch (nucleus_max_ang_mom[inucl]) { - case 0: - break; - case 1: - poly_vgl_1 = &(poly_vgl_l1[0][idx]); - poly_vgl_2 = &(poly_vgl_l1[1][idx]); - poly_vgl_3 = &(poly_vgl_l1[2][idx]); - poly_vgl_4 = &(poly_vgl_l1[3][idx]); - break; - case 2: - poly_vgl_1 = &(poly_vgl_l2[0][idx]); - poly_vgl_2 = &(poly_vgl_l2[1][idx]); - poly_vgl_3 = &(poly_vgl_l2[2][idx]); - poly_vgl_4 = &(poly_vgl_l2[3][idx]); - break; - default: - poly_vgl_1 = &(poly_vgl[0][idx]); - poly_vgl_2 = &(poly_vgl[1][idx]); - poly_vgl_3 = &(poly_vgl[2][idx]); - poly_vgl_4 = &(poly_vgl[3][idx]); - poly_vgl_5 = &(poly_vgl[4][idx]); - } - switch (n) { - case(1): - ao_vgl_1[0] = s1 * f[0]; - ao_vgl_2[0] = s2 * f[0]; - ao_vgl_3[0] = s3 * f[0]; - ao_vgl_4[0] = s4 * f[0]; - ao_vgl_5[0] = s5; - break; - case (3): -#ifdef HAVE_OPENMP - #pragma omp simd -#endif - for (int il=0 ; il<3 ; ++il) { - ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; - ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; - ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; - ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; - ao_vgl_5[il] = (poly_vgl_1[il] * s5 + - 2.0*(poly_vgl_2[il] * s2 + - poly_vgl_3[il] * s3 + - poly_vgl_4[il] * s4 )) * f[il]; - } - break; - case(5): -#ifdef HAVE_OPENMP - #pragma omp simd -#endif - for (int il=0 ; il<5 ; ++il) { - ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; - ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; - ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; - ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; - ao_vgl_5[il] = (poly_vgl_1[il] * s5 + - 2.0*(poly_vgl_2[il] * s2 + - poly_vgl_3[il] * s3 + - poly_vgl_4[il] * s4 )) * f[il]; - } - break; - default: -#ifdef HAVE_OPENMP - #pragma omp simd simdlen(8) -#endif - for (int il=0 ; ilao_basis.ao_num, ctx->ao_basis.shell_num, - ctx->ao_basis.prim_num, + ctx->ao_basis.prim_num_per_nucleus, ctx->point.num, ctx->nucleus.num, ctx->point.coord.data, @@ -5944,11 +5963,9 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) ctx->ao_basis.nucleus_range, ctx->ao_basis.nucleus_max_ang_mom, ctx->ao_basis.shell_ang_mom, - ctx->ao_basis.shell_prim_index, - ctx->ao_basis.shell_prim_num, ctx->ao_basis.ao_factor, - ctx->ao_basis.exponent, - ctx->ao_basis.coefficient_normalized, + ctx->ao_basis.expo_per_nucleus, + ctx->ao_basis.coef_per_nucleus, ctx->ao_basis.ao_vgl); /* } else if (ctx->ao_basis.type == 'S') { @@ -6023,77 +6040,74 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) import numpy as np from math import sqrt +h0 = 1.e-4 def f(a,x,y): return np.sum( [c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) +def fx(a,x,y): + return f(a,x,y) * (x[0] - y[0])**2 + + def df(a,x,y,n): - h0 = 1.e-6 - if n == 1: h = np.array([h0,0.,0.]) - elif n == 2: h = np.array([0.,h0,0.]) - elif n == 3: h = np.array([0.,0.,h0]) - return ( f(a,x+h,y) - f(a,x-h,y) ) / (2.*h0) + if n == 1: h = np.array([h0,0.,0.]) + elif n == 2: h = np.array([0.,h0,0.]) + elif n == 3: h = np.array([0.,0.,h0]) + return ( fx(a,x+h,y) - fx(a,x-h,y) ) / (2.*h0) +# return np.sum( [-2.*b * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) * (x-y)[n-1] def d2f(a,x,y,n): - h0 = 1.e-6 if n == 1: h = np.array([h0,0.,0.]) elif n == 2: h = np.array([0.,h0,0.]) elif n == 3: h = np.array([0.,0.,h0]) - return ( f(a,x+h,y) - 2.*f(a,x,y) + f(a,x-h,y) ) / h0**2 + return ( fx(a,x+h,y) - 2.*fx(a,x,y) + fx(a,x-h,y) ) / h0**2 +# return np.sum( [( (2.*b*(x-y)[n-1])**2 -2.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) def lf(a,x,y): +# return np.sum( [( (2.*b*np.linalg.norm(x-y))**2 -6.*b ) * c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) return d2f(a,x,y,1) + d2f(a,x,y,2) + d2f(a,x,y,3) + elec_26_w1 = np.array( [ 1.49050402641, 2.90106987953, -1.05920815468 ] ) elec_15_w2 = np.array( [ -2.20180344582,-1.9113150239, 2.2193744778600002 ] ) nucl_1 = np.array( [ -2.302574592081335e+00, -3.542027060505035e-01, -5.334129934317614e-02] ) #double ao_vgl[prim_num][5][elec_num]; x = elec_26_w1 ; y = nucl_1 -a = [( 403.830000, 0.001473 * 5.9876577632594533e+04), - ( 121.170000, 0.012672 * 7.2836806319891484e+03), - ( 46.345000, 0.058045 * 1.3549226646722386e+03), - ( 19.721000, 0.170510 * 3.0376315094739988e+02), - ( 8.862400, 0.318596 * 7.4924579607137730e+01), - ( 3.996200, 0.384502 * 1.8590543353806009e+01), - ( 1.763600, 0.273774 * 4.4423176930919421e+00), - ( 0.706190, 0.074397 * 8.9541051939952665e-01)] +a = [( 4.0382999999999998e+02, 1.4732000000000000e-03 * 5.9876577632594533e+04), + ( 1.2117000000000000e+02, 1.2672500000000000e-02 * 7.2836806319891484e+03), + ( 4.6344999999999999e+01, 5.8045100000000002e-02 * 1.3549226646722386e+03), + ( 1.9721000000000000e+01, 1.7051030000000000e-01 * 3.0376315094739988e+02), + ( 8.8623999999999992e+00, 3.1859579999999998e-01 * 7.4924579607137730e+01), + ( 3.9962000000000000e+00, 3.8450230000000002e-01 * 1.8590543353806009e+01), + ( 1.7636000000000001e+00, 2.7377370000000001e-01 * 4.4423176930919421e+00), + ( 7.0618999999999998e-01, 7.4396699999999996e-02 * 8.9541051939952665e-01)] norm = sqrt(3.) -print ( "[0][26][219] : %25.15e"%(f(a,x,y) * (x[0] - y[0])**2) ) -print ( "[1][26][219] : %25.15e"%(df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + 2.*f(a,x,y) * (x[0] - y[0])) ) +# x^2 * g(r) +print ( "[26][0][219] : %25.15e"%(fx(a,x,y)) ) +print ( "[26][1][219] : %25.15e"%(df(a,x,y,1)) ) +print ( "[26][2][219] : %25.15e"%(df(a,x,y,2)) ) +print ( "[26][3][219] : %25.15e"%(df(a,x,y,3)) ) +print ( "[26][4][219] : %25.15e"%(lf(a,x,y)) ) -print ( "[0][26][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) )) -print ( "[1][26][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) ) +print ( "[26][0][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) )) +print ( "[26][1][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) ) -print ( "[0][26][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) ) -print ( "[1][26][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) ) +print ( "[26][0][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) ) +print ( "[26][1][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) ) -print ( "[0][26][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) ) -print ( "[1][26][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) ) +print ( "[26][0][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) ) +print ( "[26][1][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) ) -print ( "[0][26][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) ) -print ( "[1][26][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) ) +print ( "[26][0][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) ) +print ( "[26][1][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) ) -print ( "[0][26][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) ) -print ( "[1][26][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) +print ( "[26][0][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) ) +print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) #+end_src #+RESULTS: - #+begin_example - [0][26][219] : 1.020302912653649e-08 - [1][26][219] : -4.153046808203204e-08 - [0][26][220] : 1.516649653540510e-08 - [1][26][220] : -7.725252615816528e-08 - [0][26][221] : -4.686389780112468e-09 - [1][26][221] : 2.387073693851122e-08 - [0][26][222] : 7.514847283937212e-09 - [1][26][222] : -4.025905373647693e-08 - [0][26][223] : -4.021924592380977e-09 - [1][26][223] : 2.154652944642284e-08 - [0][26][224] : 7.175074806631758e-10 - [1][26][224] : -3.843880138733679e-09 - #+end_example #+begin_src c :tangle (eval c_test) :exports none { @@ -6125,32 +6139,69 @@ rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), assert (rc == QMCKL_SUCCESS); printf("\n"); -printf(" ao_vgl ao_vgl[0][26][219] %25.15e\n", ao_vgl[26][0][219]); -printf(" ao_vgl ao_vgl[1][26][219] %25.15e\n", ao_vgl[26][1][219]); -printf(" ao_vgl ao_vgl[0][26][220] %25.15e\n", ao_vgl[26][0][220]); -printf(" ao_vgl ao_vgl[1][26][220] %25.15e\n", ao_vgl[26][1][220]); -printf(" ao_vgl ao_vgl[0][26][221] %25.15e\n", ao_vgl[26][0][221]); -printf(" ao_vgl ao_vgl[1][26][221] %25.15e\n", ao_vgl[26][1][221]); -printf(" ao_vgl ao_vgl[0][26][222] %25.15e\n", ao_vgl[26][0][222]); -printf(" ao_vgl ao_vgl[1][26][222] %25.15e\n", ao_vgl[26][1][222]); -printf(" ao_vgl ao_vgl[0][26][223] %25.15e\n", ao_vgl[26][0][223]); -printf(" ao_vgl ao_vgl[1][26][223] %25.15e\n", ao_vgl[26][1][223]); -printf(" ao_vgl ao_vgl[0][26][224] %25.15e\n", ao_vgl[26][0][224]); -printf(" ao_vgl ao_vgl[1][26][224] %25.15e\n", ao_vgl[26][1][224]); +printf(" ao_vgl ao_vgl[26][0][219] %25.15e\n", ao_vgl[26][0][219]); +printf(" ao_vgl ao_vgl[26][1][219] %25.15e\n", ao_vgl[26][1][219]); +printf(" ao_vgl ao_vgl[26][2][219] %25.15e\n", ao_vgl[26][2][219]); +printf(" ao_vgl ao_vgl[26][3][219] %25.15e\n", ao_vgl[26][3][219]); +printf(" ao_vgl ao_vgl[26][4][219] %25.15e\n", ao_vgl[26][4][219]); +printf(" ao_vgl ao_vgl[26][0][220] %25.15e\n", ao_vgl[26][0][220]); +printf(" ao_vgl ao_vgl[26][1][220] %25.15e\n", ao_vgl[26][1][220]); +printf(" ao_vgl ao_vgl[26][2][220] %25.15e\n", ao_vgl[26][2][220]); +printf(" ao_vgl ao_vgl[26][3][220] %25.15e\n", ao_vgl[26][3][220]); +printf(" ao_vgl ao_vgl[26][4][220] %25.15e\n", ao_vgl[26][4][220]); +printf(" ao_vgl ao_vgl[26][0][221] %25.15e\n", ao_vgl[26][0][221]); +printf(" ao_vgl ao_vgl[26][1][221] %25.15e\n", ao_vgl[26][1][221]); +printf(" ao_vgl ao_vgl[26][2][221] %25.15e\n", ao_vgl[26][2][221]); +printf(" ao_vgl ao_vgl[26][3][221] %25.15e\n", ao_vgl[26][3][221]); +printf(" ao_vgl ao_vgl[26][4][221] %25.15e\n", ao_vgl[26][4][221]); +printf(" ao_vgl ao_vgl[26][0][222] %25.15e\n", ao_vgl[26][0][222]); +printf(" ao_vgl ao_vgl[26][1][222] %25.15e\n", ao_vgl[26][1][222]); +printf(" ao_vgl ao_vgl[26][2][222] %25.15e\n", ao_vgl[26][2][222]); +printf(" ao_vgl ao_vgl[26][3][222] %25.15e\n", ao_vgl[26][3][222]); +printf(" ao_vgl ao_vgl[26][4][222] %25.15e\n", ao_vgl[26][4][222]); +printf(" ao_vgl ao_vgl[26][0][223] %25.15e\n", ao_vgl[26][0][223]); +printf(" ao_vgl ao_vgl[26][1][223] %25.15e\n", ao_vgl[26][1][223]); +printf(" ao_vgl ao_vgl[26][2][223] %25.15e\n", ao_vgl[26][2][223]); +printf(" ao_vgl ao_vgl[26][3][223] %25.15e\n", ao_vgl[26][3][223]); +printf(" ao_vgl ao_vgl[26][4][223] %25.15e\n", ao_vgl[26][4][223]); +printf(" ao_vgl ao_vgl[26][0][224] %25.15e\n", ao_vgl[26][0][224]); +printf(" ao_vgl ao_vgl[26][1][224] %25.15e\n", ao_vgl[26][1][224]); +printf(" ao_vgl ao_vgl[26][2][224] %25.15e\n", ao_vgl[26][2][224]); +printf(" ao_vgl ao_vgl[26][3][224] %25.15e\n", ao_vgl[26][3][224]); +printf(" ao_vgl ao_vgl[26][4][224] %25.15e\n", ao_vgl[26][4][224]); printf("\n"); -assert( fabs(ao_vgl[26][0][219] - ( 1.020298798341620e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][219] - (-4.928035238010602e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][220] - ( 1.516643537739178e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][220] - (-7.725221462603871e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][221] - (-4.686370882518819e-09)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][221] - ( 2.387064067626827e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][222] - ( 7.514816980753531e-09)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][222] - (-4.025889138635182e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][223] - (-4.021908374204471e-09)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][223] - ( 2.154644255710413e-08)) < 1.e-14 ); -assert( fabs(ao_vgl[26][0][224] - ( 7.175045873560788e-10)) < 1.e-14 ); -assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][219] - ( 1.020298798341620e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][219] - ( -4.928035238010602e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][219] - ( -4.691009312035986e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][219] - ( 1.449504046436699e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][219] - ( 4.296442111843973e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][220] - ( 1.516643537739178e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][220] - ( -7.725221462603871e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][220] - ( -6.507140835104833e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][220] - ( 2.154644255710413e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][220] - ( 6.365449359656352e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][221] - ( -4.686370882518819e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][221] - ( 2.387064067626827e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][221] - ( 2.154644255710412e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][221] - ( -1.998731863512374e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][221] - ( -1.966899656441993e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][222] - ( 7.514816980753531e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][222] - ( -4.025889138635182e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][222] - ( -2.993372555126361e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][222] - ( 1.067604670272904e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][222] - ( 3.168199650002648e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][223] - ( -4.021908374204471e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][223] - ( 2.154644255710413e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][223] - ( 1.725594944732276e-08)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][223] - ( -1.715339357718333e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][223] - ( -1.688020516893476e-07)) < 1.e-14 ); +assert( fabs(ao_vgl[26][0][224] - ( 7.175045873560788e-10)) < 1.e-14 ); +assert( fabs(ao_vgl[26][1][224] - ( -3.843864637762753e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][2][224] - ( -3.298857850451910e-09)) < 1.e-14 ); +assert( fabs(ao_vgl[26][3][224] - ( -4.073047518790881e-10)) < 1.e-14 ); +assert( fabs(ao_vgl[26][4][224] - ( 3.153244195820293e-08)) < 1.e-14 ); + } #+end_src @@ -6204,6 +6255,5 @@ assert( fabs(ao_vgl[26][1][224] - (-3.843864637762753e-09)) < 1.e-14 ); # -*- mode: org -*- # vim: syntax=c - * TODO [0/1] Missing features :noexport: - [ ] Error messages to tell what is missing when initializing diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 73f9e46..9cd7e18 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -85,7 +85,7 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { int64_t size; - double* data; + double* restrict data; } qmckl_vector; #+end_src @@ -161,7 +161,7 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { int64_t size[2]; - double* data; + double* restrict data; } qmckl_matrix; #+end_src @@ -247,7 +247,7 @@ qmckl_matrix_free( qmckl_context context, typedef struct qmckl_tensor { int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* data; + double* restrict data; } qmckl_tensor; #+end_src diff --git a/tools/theme.setup b/tools/theme.setup index 156662b..5849e08 100644 --- a/tools/theme.setup +++ b/tools/theme.setup @@ -6,7 +6,9 @@ #+INFOJS_OPT: toc:t mouse:underline path:org-info.js #+HTML_HEAD: -#+STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview +# STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview +#+STARTUP: showeverything + #+AUTHOR: TREX CoE #+LANGUAGE: en From 9b1f64843715cfeba21879784238a9858b7e8fb5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 28 Mar 2022 16:53:36 +0200 Subject: [PATCH 07/15] Accelerated AO->MO transformation --- org/qmckl_ao.org | 2 + org/qmckl_determinant.org | 8 +- org/qmckl_local_energy.org | 8 +- org/qmckl_mo.org | 344 ++++++++++++++++++++++++++++--------- 4 files changed, 274 insertions(+), 88 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 5bc9235..54c5319 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -139,6 +139,7 @@ int main() { | ~ao_num~ | ~int64_t~ | Number of AOs | | ~ao_cartesian~ | ~bool~ | If true, use polynomials. Otherwise, use spherical harmonics | | ~ao_factor~ | ~double[ao_num]~ | Normalization factor of the AO | + |---------------------+----------------------+----------------------------------------------------------------------| For H_2 with the following basis set, @@ -2491,6 +2492,7 @@ free(ao_factor_test); | ~shell_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions | | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | Value, gradients, Laplacian of the primitives at current positions | | ~ao_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions | + |----------------------+-----------------------------------+----------------------------------------------------------------------------------------------| *** After initialization diff --git a/org/qmckl_determinant.org b/org/qmckl_determinant.org index a28f49c..0412db6 100644 --- a/org/qmckl_determinant.org +++ b/org/qmckl_determinant.org @@ -1239,9 +1239,9 @@ assert(rc == QMCKL_SUCCESS); assert(qmckl_ao_basis_provided(context)); -double ao_vgl[5][walk_num][elec_num][chbrclf_ao_num]; +double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num]; -rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num); +rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num); assert (rc == QMCKL_SUCCESS); /* Set up MO data */ @@ -1256,8 +1256,8 @@ assert (rc == QMCKL_SUCCESS); assert(qmckl_mo_basis_provided(context)); -double mo_vgl[5][elec_num][chbrclf_mo_num]; -rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0])); +double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num]; +rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); /* Set up determinant data */ diff --git a/org/qmckl_local_energy.org b/org/qmckl_local_energy.org index ccd933d..c5018c0 100644 --- a/org/qmckl_local_energy.org +++ b/org/qmckl_local_energy.org @@ -655,9 +655,9 @@ assert(rc == QMCKL_SUCCESS); assert(qmckl_ao_basis_provided(context)); -double ao_vgl[5][walk_num][elec_num][chbrclf_ao_num]; +double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num]; -rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0][0]), +rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num); assert (rc == QMCKL_SUCCESS); @@ -673,8 +673,8 @@ assert (rc == QMCKL_SUCCESS); assert(qmckl_mo_basis_provided(context)); -double mo_vgl[5][elec_num][chbrclf_mo_num]; -rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0])); +double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num]; +rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); /* Set up determinant data */ diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index d4efb41..f374e57 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -84,13 +84,14 @@ int main() { The following arrays are stored in the context: - |---------------+--------------------+----------------------| - | ~mo_num~ | | Number of MOs | - | ~coefficient~ | ~[mo_num][ao_num]~ | Orbital coefficients | + |-----------------+--------------------+----------------------------------------| + | ~mo_num~ | | Number of MOs | + | ~coefficient~ | ~[mo_num][ao_num]~ | Orbital coefficients | + | ~coefficient_t~ | ~[ao_num][mo_num]~ | Transposed of the Orbital coefficients | + |-----------------+--------------------+----------------------------------------| Computed data: - |---------------+--------------------------+-------------------------------------------------------------------------------------| |---------------+--------------------------+-------------------------------------------------------------------------------------| | ~mo_vgl~ | ~[point_num][5][mo_num]~ | Value, gradients, Laplacian of the MOs at point positions | | ~mo_vgl_date~ | ~uint64_t~ | Late modification date of Value, gradients, Laplacian of the MOs at point positions | @@ -101,9 +102,10 @@ int main() { #+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_mo_basis_struct { int64_t mo_num; - double * coefficient; + double * restrict coefficient; + double * restrict coefficient_t; - double * mo_vgl; + double * restrict mo_vgl; uint64_t mo_vgl_date; int32_t uninitialized; @@ -355,6 +357,34 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) { qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); + qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; + mem_info.size = ctx->ao_basis.ao_num * ctx->mo_basis.mo_num * sizeof(double); + double* new_array = (double*) qmckl_malloc(context, mem_info); + if (new_array == NULL) { + return qmckl_failwith( context, + QMCKL_ALLOCATION_FAILED, + "qmckl_finalize_mo_basis", + NULL); + } + + assert (ctx->mo_basis.coefficient != NULL); + + if (ctx->mo_basis.coefficient_t != NULL) { + qmckl_exit_code rc = qmckl_free(context, ctx->mo_basis.coefficient); + if (rc != QMCKL_SUCCESS) { + return qmckl_failwith( context, rc, + "qmckl_finalize_mo_basis", + NULL); + } + } + + for (int64_t i=0 ; iao_basis.ao_num ; ++i) { + for (int64_t j=0 ; jmo_basis.mo_num ; ++j) { + new_array[i*ctx->mo_basis.mo_num + j] = ctx->mo_basis.coefficient[j*ctx->ao_basis.ao_num + i]; + } + } + + ctx->mo_basis.coefficient_t = new_array; qmckl_exit_code rc = QMCKL_SUCCESS; return rc; } @@ -367,11 +397,18 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) { *** Get #+begin_src c :comments org :tangle (eval h_func) :noweb yes -qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_vgl); +qmckl_exit_code +qmckl_get_mo_basis_mo_vgl(qmckl_context context, + double* const mo_vgl, + const int64_t size_max); #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_vgl) { +qmckl_exit_code +qmckl_get_mo_basis_mo_vgl(qmckl_context context, + double* const mo_vgl, + const int64_t size_max) +{ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; @@ -388,7 +425,13 @@ qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_v qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = 5 * ctx->point.num * ctx->mo_basis.mo_num; + size_t sze = ctx->point.num * 5 * ctx->mo_basis.mo_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_mo_basis_mo_vgl", + "input array too small"); + } memcpy(mo_vgl, ctx->mo_basis.mo_vgl, sze * sizeof(double)); return QMCKL_SUCCESS; @@ -396,17 +439,84 @@ qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_v #+end_src #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_get_mo_basis_vgl (context, mo_vgl) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none + interface + integer(c_int32_t) function qmckl_get_mo_basis_mo_vgl (context, & + mo_vgl, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: mo_vgl(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_mo_basis_mo_vgl + end interface + #+end_src - integer (c_int64_t) , intent(in) , value :: context - double precision, intent(out) :: mo_vgl(*) - end function - end interface + Uses the given array to compute the VGL. + + #+begin_src c :comments org :tangle (eval h_func) :noweb yes +qmckl_exit_code +qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context, + double* const mo_vgl, + const int64_t size_max); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context, + double* const mo_vgl, + const int64_t size_max) +{ + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( context, + QMCKL_INVALID_CONTEXT, + "qmckl_get_mo_basis_mo_vgl", + NULL); + } + + qmckl_exit_code rc; + + qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + assert (ctx != NULL); + + int64_t sze = ctx->mo_basis.mo_num * 5 * ctx->point.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_mo_basis_mo_vgl", + "input array too small"); + } + + rc = qmckl_context_touch(context); + if (rc != QMCKL_SUCCESS) return rc; + + double* old_array = ctx->mo_basis.mo_vgl; + + ctx->mo_basis.mo_vgl = mo_vgl; + + rc = qmckl_provide_mo_vgl(context); + if (rc != QMCKL_SUCCESS) return rc; + + ctx->mo_basis.mo_vgl = old_array; + + return QMCKL_SUCCESS; +} + #+end_src + + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_get_mo_basis_mo_vgl_inplace (context, & + mo_vgl, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: mo_vgl(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_mo_basis_mo_vgl_inplace + end interface #+end_src *** Provide @@ -462,19 +572,19 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context) if (mo_vgl == NULL) { return qmckl_failwith( context, QMCKL_ALLOCATION_FAILED, - "qmckl_mo_basis_vgl", + "qmckl_mo_basis_mo_vgl", NULL); } ctx->mo_basis.mo_vgl = mo_vgl; } - rc = qmckl_compute_mo_basis_vgl(context, - ctx->ao_basis.ao_num, - ctx->mo_basis.mo_num, - ctx->point.num, - ctx->mo_basis.coefficient, - ctx->ao_basis.ao_vgl, - ctx->mo_basis.mo_vgl); + rc = qmckl_compute_mo_basis_mo_vgl(context, + ctx->ao_basis.ao_num, + ctx->mo_basis.mo_num, + ctx->point.num, + ctx->mo_basis.coefficient_t, + ctx->ao_basis.ao_vgl, + ctx->mo_basis.mo_vgl); if (rc != QMCKL_SUCCESS) { return rc; } @@ -488,25 +598,34 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context) *** Compute :PROPERTIES: - :Name: qmckl_compute_mo_basis_vgl + :Name: qmckl_compute_mo_basis_mo_vgl :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: - #+NAME: qmckl_mo_basis_vgl_args - | ~qmckl_context~ | ~context~ | in | Global state | - | ~int64_t~ | ~ao_num~ | in | Number of AOs | - | ~int64_t~ | ~mo_num~ | in | Number of MOs | - | ~int64_t~ | ~point_num~ | in | Number of points | - | ~double~ | ~coef_normalized[mo_num][ao_num]~ | in | AO to MO transformation matrix | - | ~double~ | ~ao_vgl[point_num][5][ao_num]~ | in | Value, gradients and Laplacian of the AOs | - | ~double~ | ~mo_vgl[point_num][5][mo_num]~ | out | Value, gradients and Laplacian of the MOs | + #+NAME: qmckl_mo_basis_mo_vgl_args + | Variable | Type | In/Out | Description | + |---------------------+--------------------------------+--------+-------------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~mo_num~ | ~int64_t~ | in | Number of MOs | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~coef_normalized_t~ | ~double[mo_num][ao_num]~ | in | Transpose of the AO to MO transformation matrix | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | in | Value, gradients and Laplacian of the AOs | + | ~mo_vgl~ | ~double[point_num][5][mo_num]~ | out | Value, gradients and Laplacian of the MOs | + The matrix of AO values is very sparse, so we use a sparse-dense + matrix multiplication instead of a dgemm, as exposed in + https://dx.doi.org/10.1007/978-3-642-38718-0_14. + + + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_mo_basis_vgl_f(context, & +integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & ao_num, mo_num, point_num, & - coef_normalized, ao_vgl, mo_vgl) & + coef_normalized_t, ao_vgl, mo_vgl) & result(info) use qmckl implicit none @@ -514,55 +633,69 @@ integer function qmckl_compute_mo_basis_vgl_f(context, & integer*8 , intent(in) :: ao_num, mo_num integer*8 , intent(in) :: point_num double precision , intent(in) :: ao_vgl(ao_num,5,point_num) - double precision , intent(in) :: coef_normalized(ao_num,mo_num) + double precision , intent(in) :: coef_normalized_t(ao_num,mo_num) double precision , intent(out) :: mo_vgl(mo_num,5,point_num) - character :: TransA, TransB - double precision :: alpha, beta - integer*8 :: M, N, K, LDA, LDB, LDC, i,j + integer :: i,j,k + double precision :: c1, c2, c3, c4, c5 - TransA = 'T' - TransB = 'N' - M = mo_num - N = point_num*5_8 - K = int(ao_num,8) - alpha = 1.d0 - beta = 0.d0 - LDA = size(coef_normalized,1) - LDB = size(ao_vgl,1) - LDC = size(mo_vgl,1) + do j=1,point_num + mo_vgl(:,:,j) = 0.d0 + do k=1,ao_num + if (ao_vgl(k,1,j) /= 0.d0) then + c1 = ao_vgl(k,1,j) + c2 = ao_vgl(k,2,j) + c3 = ao_vgl(k,3,j) + c4 = ao_vgl(k,4,j) + c5 = ao_vgl(k,5,j) + do i=1,mo_num + mo_vgl(i,1,j) = mo_vgl(i,1,j) + coef_normalized_t(i,k) * c1 + mo_vgl(i,2,j) = mo_vgl(i,2,j) + coef_normalized_t(i,k) * c2 + mo_vgl(i,3,j) = mo_vgl(i,3,j) + coef_normalized_t(i,k) * c3 + mo_vgl(i,4,j) = mo_vgl(i,4,j) + coef_normalized_t(i,k) * c4 + mo_vgl(i,5,j) = mo_vgl(i,5,j) + coef_normalized_t(i,k) * c5 + end do + end if + end do + end do - info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & - coef_normalized, int(size(coef_normalized,1),8), & - ao_vgl, int(size(ao_vgl,1),8), beta, & - mo_vgl,LDC) - - info = QMCKL_SUCCESS - -end function qmckl_compute_mo_basis_vgl_f +end function qmckl_compute_mo_basis_mo_vgl_doc_f #+end_src - #+CALL: generate_c_header(table=qmckl_mo_basis_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_vgl")) + #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl")) #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_mo_basis_vgl ( - const qmckl_context context, - const int64_t ao_num, - const int64_t mo_num, - const int64_t point_num, - const double* coef_normalized, - const double* ao_vgl, - double* const mo_vgl ); + qmckl_exit_code qmckl_compute_mo_basis_mo_vgl ( + const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_vgl, + double* const mo_vgl ); #+end_src + #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc")) - #+CALL: generate_c_interface(table=qmckl_mo_basis_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_vgl")) + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_mo_basis_mo_vgl_doc ( + const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_vgl, + double* const mo_vgl ); + #+end_src + #+CALL: generate_c_interface(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc")) + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_mo_basis_vgl & - (context, ao_num, mo_num, point_num, coef_normalized, ao_vgl, mo_vgl) & - bind(C) result(info) + integer(c_int32_t) function qmckl_compute_mo_basis_mo_vgl_doc & + (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -571,15 +704,66 @@ end function qmckl_compute_mo_basis_vgl_f integer (c_int64_t) , intent(in) , value :: ao_num integer (c_int64_t) , intent(in) , value :: mo_num integer (c_int64_t) , intent(in) , value :: point_num - real (c_double ) , intent(in) :: coef_normalized(ao_num,mo_num) + real (c_double ) , intent(in) :: coef_normalized_t(ao_num,mo_num) real (c_double ) , intent(in) :: ao_vgl(ao_num,5,point_num) real (c_double ) , intent(out) :: mo_vgl(mo_num,5,point_num) - integer(c_int32_t), external :: qmckl_compute_mo_basis_vgl_f - info = qmckl_compute_mo_basis_vgl_f & - (context, ao_num, mo_num, point_num, coef_normalized, ao_vgl, mo_vgl) + integer(c_int32_t), external :: qmckl_compute_mo_basis_mo_vgl_doc_f + info = qmckl_compute_mo_basis_mo_vgl_doc_f & + (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl) - end function qmckl_compute_mo_basis_vgl + end function qmckl_compute_mo_basis_mo_vgl_doc + #+end_src + + #+begin_src c :tangle (eval c) :comments org +qmckl_exit_code +qmckl_compute_mo_basis_mo_vgl (const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_vgl, + double* const mo_vgl ) +{ +#ifdef HAVE_HPC + return qmckl_compute_mo_basis_mo_vgl_hpc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl); +#else + return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl); +#endif +} + #+end_src + + +*** HPC version + + + #+begin_src c :tangle (eval h_func) :comments org +#ifdef HAVE_HPC +qmckl_exit_code +qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_vgl, + double* const mo_vgl ); +#endif + #+end_src + + #+begin_src c :tangle (eval c) :comments org +#ifdef HAVE_HPC +qmckl_exit_code +qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* restrict coef_normalized_t, + const double* restrict ao_vgl, + double* restrict const mo_vgl ) +{ + return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl); +} +#endif #+end_src *** Test @@ -772,7 +956,7 @@ assert (rc == QMCKL_SUCCESS); assert(qmckl_mo_basis_provided(context)); double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num]; -rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0])); +rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), walk_num*elec_num*5*chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); // Test overlap of MO @@ -807,7 +991,7 @@ assert (rc == QMCKL_SUCCESS); // // // Calculate value of MO (1st electron) // double mo_vgl[5][walk_num][elec_num][chbrclf_mo_num]; -// rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0][0])); +// rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0][0])); // assert (rc == QMCKL_SUCCESS); // ovlmo1 += mo_vgl[0][0][0][0]*mo_vgl[0][0][0][0]*dr3; // } From 1b0bfd40be0972142b55eed705a35315fbc8d883 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 28 Mar 2022 17:37:50 +0200 Subject: [PATCH 08/15] HPC version of AO->MO transformation --- org/qmckl_mo.org | 68 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index f374e57..baeaef3 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -621,7 +621,6 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context) - #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & ao_num, mo_num, point_num, & @@ -761,7 +760,72 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, const double* restrict ao_vgl, double* restrict const mo_vgl ) { - return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl); +#ifdef HAVE_OPENMP + #pragma omp parallel for +#endif + for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { + double* restrict const vgl1 = &(mo_vgl[ipoint*5*mo_num]); + const double* restrict avgl1 = &(ao_vgl[ipoint*5*ao_num]); + double* restrict const vgl2 = vgl1 + mo_num; + double* restrict const vgl3 = vgl1 + (mo_num << 1); + double* restrict const vgl4 = vgl1 + (mo_num << 1) + mo_num; + double* restrict const vgl5 = vgl1 + (mo_num << 2); + const double* restrict avgl2 = avgl1 + ao_num; + const double* restrict avgl3 = avgl1 + (ao_num << 1); + const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num; + const double* restrict avgl5 = avgl1 + (ao_num << 2); + + for (int64_t i=0 ; i Date: Mon, 28 Mar 2022 17:58:03 +0200 Subject: [PATCH 09/15] Accelerated HPC AO->MO transformation --- org/qmckl_mo.org | 75 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index baeaef3..a9a2047 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -803,25 +803,70 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, } } - for (int64_t n=0 ; n < nidx ; ++n) { + int64_t n; + for (n=0 ; n < nidx-4 ; n+=4) { int64_t k = idx[n]; - const double* restrict ck1 = coef_normalized_t + k*mo_num; - if (avgl1[n] != 0.) { - const double a1 = av1[n]; - const double a2 = av2[n]; - const double a3 = av3[n]; - const double a4 = av4[n]; - const double a5 = av5[n]; + const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num; + const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num; + const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num; + const double* restrict ck4 = coef_normalized_t + idx[n+3]*mo_num; + + const double a11 = av1[n ]; + const double a21 = av1[n+1]; + const double a31 = av1[n+2]; + const double a41 = av1[n+3]; + + const double a12 = av2[n ]; + const double a22 = av2[n+1]; + const double a32 = av2[n+2]; + const double a42 = av2[n+3]; + + const double a13 = av3[n ]; + const double a23 = av3[n+1]; + const double a33 = av3[n+2]; + const double a43 = av3[n+3]; + + const double a14 = av4[n ]; + const double a24 = av4[n+1]; + const double a34 = av4[n+2]; + const double a44 = av4[n+3]; + + const double a15 = av5[n ]; + const double a25 = av5[n+1]; + const double a35 = av5[n+2]; + const double a45 = av5[n+3]; + +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int64_t i=0 ; i Date: Mon, 28 Mar 2022 18:26:20 +0200 Subject: [PATCH 10/15] Fix type error --- org/qmckl_mo.org | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index a9a2047..eb37368 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -425,7 +425,7 @@ qmckl_get_mo_basis_mo_vgl(qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - size_t sze = ctx->point.num * 5 * ctx->mo_basis.mo_num; + const int64_t sze = ctx->point.num * 5 * ctx->mo_basis.mo_num; if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, @@ -481,7 +481,7 @@ qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context, qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; assert (ctx != NULL); - int64_t sze = ctx->mo_basis.mo_num * 5 * ctx->point.num; + const int64_t sze = ctx->mo_basis.mo_num * 5 * ctx->point.num; if (size_max < sze) { return qmckl_failwith( context, QMCKL_INVALID_ARG_3, From 91811079d30a7d9d2ab82ff8ad9879e4bdce1fdb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 28 Mar 2022 18:29:29 +0200 Subject: [PATCH 11/15] Fixed bugs. Travis OK. --- org/qmckl_mo.org | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index eb37368..4f8bb57 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -632,9 +632,9 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & integer*8 , intent(in) :: ao_num, mo_num integer*8 , intent(in) :: point_num double precision , intent(in) :: ao_vgl(ao_num,5,point_num) - double precision , intent(in) :: coef_normalized_t(ao_num,mo_num) + double precision , intent(in) :: coef_normalized_t(mo_num,ao_num) double precision , intent(out) :: mo_vgl(mo_num,5,point_num) - integer :: i,j,k + integer*8 :: i,j,k double precision :: c1, c2, c3, c4, c5 do j=1,point_num From bac1eb33f0fd836ac37d0598ed061967f8f3c498 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 4 Apr 2022 12:11:26 +0200 Subject: [PATCH 12/15] Fixed configure for Nvidian compilers --- configure.ac | 18 ++++++++++++------ org/qmckl_context.org | 16 ++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/configure.ac b/configure.ac index 56f0ed1..fdae5dc 100644 --- a/configure.ac +++ b/configure.ac @@ -200,16 +200,22 @@ AS_IF([test "$BLAS_LIBS" == "$LAPACK_LIBS"], [BLAS_LIBS=""]) # Specific options required with some compilers case $FC in - ifort*) + *ifort*) FCFLAGS="$FCFLAGS -nofor-main" ;; -#gfortran*) -# # Order is important here -# FCFLAGS="-cpp $FCFLAGS" -# ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -fPIC -Mnomain" + ;; + esac +case $CC in + + *nvc*) + CFLAGS="$CFLAGS -fPIC" + ;; +esac # Options. @@ -358,7 +364,7 @@ CC..............: ${CC} CPPFLAGS........: ${CPPFLAGS} CFLAGS..........: ${CFLAGS} FC..............: ${FC} -FCLAGS..........: ${FCFLAGS} +FCFLAGS.........: ${FCFLAGS} LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} diff --git a/org/qmckl_context.org b/org/qmckl_context.org index 6a35789..ac389f0 100644 --- a/org/qmckl_context.org +++ b/org/qmckl_context.org @@ -169,7 +169,7 @@ qmckl_context qmckl_context_check(const qmckl_context context) { if (context == QMCKL_NULL_CONTEXT) return QMCKL_NULL_CONTEXT; - const qmckl_context_struct* const ctx = (const qmckl_context_struct* const) context; + const qmckl_context_struct* const ctx = (const qmckl_context_struct*) context; /* Try to access memory */ if (ctx->tag != VALID_TAG) { @@ -209,7 +209,7 @@ qmckl_context_touch(const qmckl_context context) NULL); } - qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; ctx->date += 1UL; ctx->point.date += 1UL; @@ -234,7 +234,7 @@ qmckl_context qmckl_context_create(); qmckl_context qmckl_context_create() { qmckl_context_struct* const ctx = - (qmckl_context_struct* const) malloc (sizeof(qmckl_context_struct)); + (qmckl_context_struct*) malloc (sizeof(qmckl_context_struct)); if (ctx == NULL) { return QMCKL_NULL_CONTEXT; @@ -350,7 +350,7 @@ void qmckl_unlock(qmckl_context context); void qmckl_lock(qmckl_context context) { if (context == QMCKL_NULL_CONTEXT) return ; - qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; errno = 0; int rc = pthread_mutex_lock( &(ctx->mutex) ); if (rc != 0) { @@ -362,7 +362,7 @@ void qmckl_lock(qmckl_context context) { } void qmckl_unlock(const qmckl_context context) { - qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; int rc = pthread_mutex_unlock( &(ctx->mutex) ); if (rc != 0) { fprintf(stderr, "DEBUG qmckl_unlock:%s\n", strerror(rc) ); @@ -401,10 +401,10 @@ qmckl_context qmckl_context_copy(const qmckl_context context) { { const qmckl_context_struct* const old_ctx = - (qmckl_context_struct* const) checked_context; + (qmckl_context_struct*) checked_context; qmckl_context_struct* const new_ctx = - (qmckl_context_struct* const) malloc (context, sizeof(qmckl_context_struct)); + (qmckl_context_struct*) malloc (context, sizeof(qmckl_context_struct)); if (new_ctx == NULL) { qmckl_unlock(context); @@ -467,7 +467,7 @@ qmckl_context_destroy (const qmckl_context context) const qmckl_context checked_context = qmckl_context_check(context); if (checked_context == QMCKL_NULL_CONTEXT) return QMCKL_INVALID_CONTEXT; - qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; assert (ctx != NULL); /* Shouldn't be possible because the context is valid */ qmckl_lock(context); From e995d81b7e193253084a9f6fe45d9bcc1cca6e47 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 20 Apr 2022 15:55:59 +0200 Subject: [PATCH 13/15] Add Fortran interfaces in MOs --- org/qmckl_mo.org | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 4f8bb57..84fe3ef 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -257,6 +257,35 @@ bool qmckl_mo_basis_provided(const qmckl_context context) { #+end_src + +*** Fortran interfaces + + #+begin_src f90 :tangle (eval fh_func) :comments org +interface + integer(c_int32_t) function qmckl_get_mo_basis_mo_num (context, & + mo_num) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(out) :: mo_num + end function qmckl_get_mo_basis_mo_num +end interface + +interface + integer(c_int32_t) function qmckl_get_mo_basis_coefficient(context, & + coefficient, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: coefficient(*) + integer (c_int64_t) , intent(int), value :: size_max + end function qmckl_get_mo_basis_coefficient +end interface + + #+end_src + ** Initialization functions To set the basis set, all the following functions need to be From 5c5c13f5b3ea32138a6da96401afa6038e5da15a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 20 Apr 2022 16:11:06 +0200 Subject: [PATCH 14/15] Fix previous commit --- org/qmckl_mo.org | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 84fe3ef..88fe69c 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -257,10 +257,10 @@ bool qmckl_mo_basis_provided(const qmckl_context context) { #+end_src - + *** Fortran interfaces - #+begin_src f90 :tangle (eval fh_func) :comments org + #+begin_src f90 :tangle (eval fh_func) :comments org interface integer(c_int32_t) function qmckl_get_mo_basis_mo_num (context, & mo_num) bind(C) @@ -280,7 +280,7 @@ interface implicit none integer (c_int64_t) , intent(in) , value :: context double precision, intent(out) :: coefficient(*) - integer (c_int64_t) , intent(int), value :: size_max + integer (c_int64_t) , intent(in) , value :: size_max end function qmckl_get_mo_basis_coefficient end interface @@ -397,7 +397,7 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) { } assert (ctx->mo_basis.coefficient != NULL); - + if (ctx->mo_basis.coefficient_t != NULL) { qmckl_exit_code rc = qmckl_free(context, ctx->mo_basis.coefficient); if (rc != QMCKL_SUCCESS) { @@ -474,7 +474,7 @@ qmckl_get_mo_basis_mo_vgl(qmckl_context context, use, intrinsic :: iso_c_binding import implicit none - + integer (c_int64_t) , intent(in) , value :: context double precision, intent(out) :: mo_vgl(*) integer (c_int64_t) , intent(in) , value :: size_max @@ -648,8 +648,8 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context) matrix multiplication instead of a dgemm, as exposed in https://dx.doi.org/10.1007/978-3-642-38718-0_14. - - + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & ao_num, mo_num, point_num, & @@ -700,7 +700,7 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f const int64_t point_num, const double* coef_normalized_t, const double* ao_vgl, - double* const mo_vgl ); + double* const mo_vgl ); #+end_src #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc")) @@ -714,11 +714,11 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f const int64_t point_num, const double* coef_normalized_t, const double* ao_vgl, - double* const mo_vgl ); + double* const mo_vgl ); #+end_src #+CALL: generate_c_interface(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc")) - + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_mo_basis_mo_vgl_doc & @@ -743,7 +743,7 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f end function qmckl_compute_mo_basis_mo_vgl_doc #+end_src - #+begin_src c :tangle (eval c) :comments org + #+begin_src c :tangle (eval c) :comments org qmckl_exit_code qmckl_compute_mo_basis_mo_vgl (const qmckl_context context, const int64_t ao_num, @@ -760,12 +760,12 @@ qmckl_compute_mo_basis_mo_vgl (const qmckl_context context, #endif } #+end_src - - -*** HPC version - - #+begin_src c :tangle (eval h_func) :comments org + +*** HPC version + + + #+begin_src c :tangle (eval h_func) :comments org #ifdef HAVE_HPC qmckl_exit_code qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, @@ -778,7 +778,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, #endif #+end_src - #+begin_src c :tangle (eval c) :comments org + #+begin_src c :tangle (eval c) :comments org #ifdef HAVE_HPC qmckl_exit_code qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, @@ -844,22 +844,22 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, const double a21 = av1[n+1]; const double a31 = av1[n+2]; const double a41 = av1[n+3]; - + const double a12 = av2[n ]; const double a22 = av2[n+1]; const double a32 = av2[n+2]; const double a42 = av2[n+3]; - + const double a13 = av3[n ]; const double a23 = av3[n+1]; const double a33 = av3[n+2]; const double a43 = av3[n+3]; - + const double a14 = av4[n ]; const double a24 = av4[n+1]; const double a34 = av4[n+2]; const double a44 = av4[n+3]; - + const double a15 = av5[n ]; const double a25 = av5[n+1]; const double a35 = av5[n+2]; From 4367d03353ced5f5fcb2b7b758070e20051653ae Mon Sep 17 00:00:00 2001 From: q-posev Date: Mon, 2 May 2022 16:37:15 +0200 Subject: [PATCH 15/15] Fix typos in function names --- org/qmckl_numprec.org | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/org/qmckl_numprec.org b/org/qmckl_numprec.org index 3b73dc3..7c5f26f 100644 --- a/org/qmckl_numprec.org +++ b/org/qmckl_numprec.org @@ -250,19 +250,19 @@ qmckl_exit_code qmckl_set_numprec_range(const qmckl_context context, const int r # Fortran interface #+begin_src f90 :tangle (eval fh_func) interface - integer (qmckl_exit_code) function qmckl_numprec_set_range(context, range) bind(C) + integer (qmckl_exit_code) function qmckl_set_numprec_range(context, range) bind(C) use, intrinsic :: iso_c_binding import integer (qmckl_context), intent(in), value :: context integer (c_int32_t), intent(in), value :: range - end function qmckl_numprec_set_range + end function qmckl_set_numprec_range end interface #+end_src ~qmckl_get_numprec_range~ returns the value of the numerical range in the context. #+begin_src c :comments org :tangle (eval h_func) :exports none -int32_t qmckl_get_numprec_get_range(const qmckl_context context); +int32_t qmckl_get_numprec_range(const qmckl_context context); #+end_src # Source