4
1
mirror of https://github.com/pfloos/quack synced 2024-12-22 12:23:50 +01:00

linking with quack-cuda: OK

This commit is contained in:
Abdallah Ammar 2024-11-26 19:31:44 +01:00
parent 41dd532d0e
commit 9c42437746
6 changed files with 83 additions and 6 deletions

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
*.slurm
*.mod
*.so
*.o *.o
*. *.
__pycache__ __pycache__

View File

@ -13,3 +13,5 @@ esac
export QUACK_ROOT="$( cd $QUACK_ROOT; pwd -P )" export QUACK_ROOT="$( cd $QUACK_ROOT; pwd -P )"
export PATH="${QUACK_ROOT}/bin:$PATH" export PATH="${QUACK_ROOT}/bin:$PATH"
export LD_LIBRARY_PATH="${QUACK_ROOT}/src/cuda/build:$LD_LIBRARY_PATH"

View File

@ -1,5 +1,7 @@
subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF) subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
use cu_quack_module
! Perform a direct random phase approximation calculation ! Perform a direct random phase approximation calculation
implicit none implicit none
@ -37,6 +39,8 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
double precision,allocatable :: Om(:) double precision,allocatable :: Om(:)
double precision,allocatable :: XpY(:,:) double precision,allocatable :: XpY(:,:)
double precision,allocatable :: XmY(:,:) double precision,allocatable :: XmY(:,:)
! DEBUG
double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
double precision :: EcRPA(nspin) double precision :: EcRPA(nspin)
@ -74,6 +78,13 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph) call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph)
if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph) if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
! DEBUG
allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
call ph_drpa(nO, nBas, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1))
print *, ' CPU:', Aph(1,1)
print *, ' GPU:', XpY_gpu(1,1)
stop
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY) call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
call print_excitation_energies('phRPA@RHF','singlet',nS,Om) call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY) call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)

View File

@ -7,7 +7,8 @@
#include "ph_drpa.h" #include "ph_drpa.h"
int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) { int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI,
double *h_Omega, double *h_XpY, double *h_XmY) {
double *d_eps; double *d_eps;
double *d_ERI; double *d_ERI;
@ -16,6 +17,17 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
int nBas4 = nBas2 * nBas2; int nBas4 = nBas2 * nBas2;
int ia, jb;
int nS = nO * (nBas - nO);
for (ia = 0; ia < nS; ia++) {
h_Omega[ia] = 0.0;
for (jb = 0; jb < nS; jb++) {
h_XmY[jb + nO * nBas * ia] = 0.0;
h_XpY[jb + nO * nBas * ia] = 0.0;
}
}
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)), check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)),
"cudaMalloc", __FILE__, __LINE__); "cudaMalloc", __FILE__, __LINE__);
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)), check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
@ -28,13 +40,15 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
"cudaMemcpy", __FILE__, __LINE__); "cudaMemcpy", __FILE__, __LINE__);
// construct A matrix // construct A matrix
int nS = nO * (nBas * nO);
double *d_A; double *d_A;
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__); check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A); // phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__); // check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
check_Cuda_Errors(cudaMemcpy(h_XpY, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),
"cudaMemcpy", __FILE__, __LINE__);
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__); check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__); check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__); check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);

View File

@ -109,6 +109,15 @@ else:
print("Unknown platform. Only Linux and Darwin are supported.") print("Unknown platform. Only Linux and Darwin are supported.")
sys.exit(-1) sys.exit(-1)
if USE_GPU:
compiler_tmp = compiler.strip().split('\n')
compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas".format(QUACK_ROOT)
compiler_exe = '\n'.join(compiler_tmp)
else:
compiler_exe = compiler
header = """# header = """#
# This file was automatically generated. Do not modify this file. # This file was automatically generated. Do not modify this file.
# To change compiling options, make the modifications in # To change compiling options, make the modifications in
@ -171,7 +180,7 @@ build_in_lib_dir = "\n".join([
build_in_exe_dir = "\n".join([ build_in_exe_dir = "\n".join([
header, header,
compiler, compiler_exe,
rule_fortran, rule_fortran,
rule_build_exe, rule_build_exe,
]) ])
@ -191,7 +200,6 @@ if USE_GPU:
lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0] lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
else: else:
lib_dirs.remove("mod") lib_dirs.remove("mod")
print(lib_dirs)
def create_ninja_in_libdir(directory): def create_ninja_in_libdir(directory):
def write_rule(f, source_file, replace): def write_rule(f, source_file, replace):

View File

@ -0,0 +1,39 @@
module cu_quack_module
use, intrinsic :: iso_c_binding
implicit none
! ---
interface
subroutine ph_drpa(nO, nBas, eps, ERI, &
Omega, XpY, XmY) bind(C, name = "ph_drpa")
import c_int, c_double
integer(c_int), intent(in), value :: nO, nBas
real(c_double), intent(in) :: eps(nBas)
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
real(c_double), intent(out) :: Omega(nO*nBas)
real(c_double), intent(out) :: XpY(nO*nBas,nO*nBas)
real(c_double), intent(out) :: XmY(nO*nBas,nO*nBas)
end subroutine ph_drpa
end interface
! ---
contains
subroutine cu_quack_module_test()
implicit none
print*, ' hello from mod_test'
end subroutine cu_quack_module_test
! ---
end module cu_quack_module