diff --git a/.gitignore b/.gitignore index 899b091..4a6ab19 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +*.slurm +*.mod +*.so *.o *. __pycache__ diff --git a/quack.rc b/quack.rc index 77ca7ff..bede54b 100644 --- a/quack.rc +++ b/quack.rc @@ -13,3 +13,5 @@ esac export QUACK_ROOT="$( cd $QUACK_ROOT; pwd -P )" export PATH="${QUACK_ROOT}/bin:$PATH" +export LD_LIBRARY_PATH="${QUACK_ROOT}/src/cuda/build:$LD_LIBRARY_PATH" + diff --git a/src/RPA/phRRPA.f90 b/src/RPA/phRRPA.f90 index 7a13d8f..94c6576 100644 --- a/src/RPA/phRRPA.f90 +++ b/src/RPA/phRRPA.f90 @@ -1,5 +1,7 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF) + use cu_quack_module + ! Perform a direct random phase approximation calculation implicit none @@ -37,6 +39,8 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO, double precision,allocatable :: Om(:) double precision,allocatable :: XpY(:,:) double precision,allocatable :: XmY(:,:) + ! DEBUG + double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:) double precision :: EcRPA(nspin) @@ -74,6 +78,13 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO, call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph) if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph) + ! DEBUG + allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS)) + call ph_drpa(nO, nBas, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1)) + print *, ' CPU:', Aph(1,1) + print *, ' GPU:', XpY_gpu(1,1) + stop + call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY) call print_excitation_energies('phRPA@RHF','singlet',nS,Om) call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY) diff --git a/src/cuda/src/ph_drpa.c b/src/cuda/src/ph_drpa.c index aa8d357..61afcbe 100644 --- a/src/cuda/src/ph_drpa.c +++ b/src/cuda/src/ph_drpa.c @@ -7,7 +7,8 @@ #include "ph_drpa.h" -int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) { +int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI, + double *h_Omega, double *h_XpY, double *h_XmY) { double *d_eps; double *d_ERI; @@ -16,6 +17,17 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) { int nBas4 = nBas2 * nBas2; + int ia, jb; + int nS = nO * (nBas - nO); + for (ia = 0; ia < nS; ia++) { + h_Omega[ia] = 0.0; + for (jb = 0; jb < nS; jb++) { + h_XmY[jb + nO * nBas * ia] = 0.0; + h_XpY[jb + nO * nBas * ia] = 0.0; + } + } + + check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)), "cudaMalloc", __FILE__, __LINE__); check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)), @@ -28,13 +40,15 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) { "cudaMemcpy", __FILE__, __LINE__); // construct A matrix - int nS = nO * (nBas * nO); double *d_A; check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__); - phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A); - check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__); +// phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A); +// check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__); + check_Cuda_Errors(cudaMemcpy(h_XpY, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost), + "cudaMemcpy", __FILE__, __LINE__); + check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__); check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__); check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__); diff --git a/src/make_ninja.py b/src/make_ninja.py index c78bc7d..3d558d3 100755 --- a/src/make_ninja.py +++ b/src/make_ninja.py @@ -109,6 +109,15 @@ else: print("Unknown platform. Only Linux and Darwin are supported.") sys.exit(-1) +if USE_GPU: + compiler_tmp = compiler.strip().split('\n') + compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas".format(QUACK_ROOT) + compiler_exe = '\n'.join(compiler_tmp) +else: + compiler_exe = compiler + + + header = """# # This file was automatically generated. Do not modify this file. # To change compiling options, make the modifications in @@ -171,7 +180,7 @@ build_in_lib_dir = "\n".join([ build_in_exe_dir = "\n".join([ header, - compiler, + compiler_exe, rule_fortran, rule_build_exe, ]) @@ -191,7 +200,6 @@ if USE_GPU: lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0] else: lib_dirs.remove("mod") -print(lib_dirs) def create_ninja_in_libdir(directory): def write_rule(f, source_file, replace): diff --git a/src/mod/cu_quack_module.f90 b/src/mod/cu_quack_module.f90 new file mode 100644 index 0000000..f73cc6c --- /dev/null +++ b/src/mod/cu_quack_module.f90 @@ -0,0 +1,39 @@ +module cu_quack_module + + use, intrinsic :: iso_c_binding + + implicit none + + ! --- + + interface + + subroutine ph_drpa(nO, nBas, eps, ERI, & + Omega, XpY, XmY) bind(C, name = "ph_drpa") + + import c_int, c_double + integer(c_int), intent(in), value :: nO, nBas + real(c_double), intent(in) :: eps(nBas) + real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas) + real(c_double), intent(out) :: Omega(nO*nBas) + real(c_double), intent(out) :: XpY(nO*nBas,nO*nBas) + real(c_double), intent(out) :: XmY(nO*nBas,nO*nBas) + + end subroutine ph_drpa + + end interface + + ! --- + + contains + + subroutine cu_quack_module_test() + implicit none + print*, ' hello from mod_test' + end subroutine cu_quack_module_test + + ! --- + +end module cu_quack_module + +