linking with quack-cuda: OK

2025-04-02 06:51:37 +02:00 · 2024-11-26 19:31:44 +01:00 · 2024-11-26 19:31:44 +01:00 · 9c42437746
commit 9c42437746
parent 41dd532d0e
6 changed files with 83 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,6 @@
 *.slurm
 *.mod
 *.so
 *.o
 *.
 __pycache__
--- a/quack.rc
+++ b/quack.rc
@ -13,3 +13,5 @@ esac
 export QUACK_ROOT="$( cd $QUACK_ROOT; pwd -P )"
 export PATH="${QUACK_ROOT}/bin:$PATH"
 export LD_LIBRARY_PATH="${QUACK_ROOT}/src/cuda/build:$LD_LIBRARY_PATH"
--- a/src/RPA/phRRPA.f90
+++ b/src/RPA/phRRPA.f90
@ -1,5 +1,7 @@
 subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
  use cu_quack_module
 ! Perform a direct random phase approximation calculation
  implicit none
@ -37,6 +39,8 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
  double precision,allocatable  :: Om(:)
  double precision,allocatable  :: XpY(:,:)
  double precision,allocatable  :: XmY(:,:)
  ! DEBUG
  double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
  double precision              :: EcRPA(nspin)
@ -74,6 +78,13 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
    call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph)
    if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
    ! DEBUG
    allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
    call ph_drpa(nO, nBas, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1))
    print *, ' CPU:', Aph(1,1)
    print *, ' GPU:', XpY_gpu(1,1)
    stop
    call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
    call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
    call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
--- a/src/cuda/src/ph_drpa.c
+++ b/src/cuda/src/ph_drpa.c
@ -7,7 +7,8 @@
 #include "ph_drpa.h"
-int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
+int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI,
            double *h_Omega, double *h_XpY, double *h_XmY) {
    double *d_eps;
    double *d_ERI;
@ -16,6 +17,17 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
    int nBas4 = nBas2 * nBas2;
    int ia, jb;
    int nS = nO * (nBas - nO);
    for (ia = 0; ia < nS; ia++) {
        h_Omega[ia] = 0.0;
        for (jb = 0; jb < nS; jb++) {
            h_XmY[jb + nO * nBas * ia] = 0.0;
            h_XpY[jb + nO * nBas * ia] = 0.0;
        }
    }
    check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)),
        "cudaMalloc", __FILE__, __LINE__);
    check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
@ -28,13 +40,15 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
        "cudaMemcpy", __FILE__, __LINE__);
    // construct A matrix
    int nS = nO * (nBas * nO);
    double *d_A;
    check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
-    phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
+//    phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
-    check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
+//    check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
    check_Cuda_Errors(cudaMemcpy(h_XpY, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost), 
        "cudaMemcpy", __FILE__, __LINE__);
    check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
    check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
    check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);
--- a/src/make_ninja.py
+++ b/src/make_ninja.py
@ -109,6 +109,15 @@ else:
    print("Unknown platform. Only Linux and Darwin are supported.")
    sys.exit(-1)
 if USE_GPU:
    compiler_tmp = compiler.strip().split('\n')
    compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas".format(QUACK_ROOT)
    compiler_exe = '\n'.join(compiler_tmp)
 else:
    compiler_exe = compiler
 header = """#
 # This file was automatically generated. Do not modify this file.
 # To change compiling options, make the modifications in 
@ -171,7 +180,7 @@ build_in_lib_dir = "\n".join([
 build_in_exe_dir = "\n".join([
 	header,
-	compiler,
+	compiler_exe,
 	rule_fortran,
 	rule_build_exe,
 ])
@ -191,7 +200,6 @@ if USE_GPU:
    lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
 else:
    lib_dirs.remove("mod")
 print(lib_dirs)
 def create_ninja_in_libdir(directory):
    def write_rule(f, source_file, replace):
--- a/src/mod/cu_quack_module.f90
+++ b/src/mod/cu_quack_module.f90
@ -0,0 +1,39 @@
 module cu_quack_module
  use, intrinsic :: iso_c_binding
  implicit none
  ! ---
  interface
    subroutine ph_drpa(nO, nBas, eps, ERI, &
                       Omega, XpY, XmY) bind(C, name = "ph_drpa")
      import c_int, c_double
      integer(c_int), intent(in), value :: nO, nBas
      real(c_double), intent(in)        :: eps(nBas)
      real(c_double), intent(in)        :: ERI(nBas,nBas,nBas,nBas)
      real(c_double), intent(out)       :: Omega(nO*nBas)
      real(c_double), intent(out)       :: XpY(nO*nBas,nO*nBas)
      real(c_double), intent(out)       :: XmY(nO*nBas,nO*nBas)
    end subroutine ph_drpa
  end interface
  ! ---
  contains
    subroutine cu_quack_module_test()
        implicit none
        print*, ' hello from mod_test'
    end subroutine cu_quack_module_test
  ! ---
 end module cu_quack_module