mirror of
https://github.com/pfloos/quack
synced 2024-12-22 04:13:52 +01:00
linking with quack-cuda: OK
This commit is contained in:
parent
41dd532d0e
commit
9c42437746
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,6 @@
|
||||
*.slurm
|
||||
*.mod
|
||||
*.so
|
||||
*.o
|
||||
*.
|
||||
__pycache__
|
||||
|
2
quack.rc
2
quack.rc
@ -13,3 +13,5 @@ esac
|
||||
export QUACK_ROOT="$( cd $QUACK_ROOT; pwd -P )"
|
||||
|
||||
export PATH="${QUACK_ROOT}/bin:$PATH"
|
||||
export LD_LIBRARY_PATH="${QUACK_ROOT}/src/cuda/build:$LD_LIBRARY_PATH"
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||
|
||||
use cu_quack_module
|
||||
|
||||
! Perform a direct random phase approximation calculation
|
||||
|
||||
implicit none
|
||||
@ -37,6 +39,8 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
||||
double precision,allocatable :: Om(:)
|
||||
double precision,allocatable :: XpY(:,:)
|
||||
double precision,allocatable :: XmY(:,:)
|
||||
! DEBUG
|
||||
double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
|
||||
|
||||
double precision :: EcRPA(nspin)
|
||||
|
||||
@ -74,6 +78,13 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
||||
call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph)
|
||||
if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
|
||||
|
||||
! DEBUG
|
||||
allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
|
||||
call ph_drpa(nO, nBas, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1))
|
||||
print *, ' CPU:', Aph(1,1)
|
||||
print *, ' GPU:', XpY_gpu(1,1)
|
||||
stop
|
||||
|
||||
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
||||
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
||||
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
||||
|
@ -7,7 +7,8 @@
|
||||
|
||||
#include "ph_drpa.h"
|
||||
|
||||
int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
|
||||
int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI,
|
||||
double *h_Omega, double *h_XpY, double *h_XmY) {
|
||||
|
||||
double *d_eps;
|
||||
double *d_ERI;
|
||||
@ -16,6 +17,17 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
|
||||
int nBas4 = nBas2 * nBas2;
|
||||
|
||||
|
||||
int ia, jb;
|
||||
int nS = nO * (nBas - nO);
|
||||
for (ia = 0; ia < nS; ia++) {
|
||||
h_Omega[ia] = 0.0;
|
||||
for (jb = 0; jb < nS; jb++) {
|
||||
h_XmY[jb + nO * nBas * ia] = 0.0;
|
||||
h_XpY[jb + nO * nBas * ia] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
||||
@ -28,13 +40,15 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) {
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
// construct A matrix
|
||||
int nS = nO * (nBas * nO);
|
||||
double *d_A;
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
|
||||
phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
|
||||
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||
// phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
|
||||
// check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMemcpy(h_XpY, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);
|
||||
|
@ -109,6 +109,15 @@ else:
|
||||
print("Unknown platform. Only Linux and Darwin are supported.")
|
||||
sys.exit(-1)
|
||||
|
||||
if USE_GPU:
|
||||
compiler_tmp = compiler.strip().split('\n')
|
||||
compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas".format(QUACK_ROOT)
|
||||
compiler_exe = '\n'.join(compiler_tmp)
|
||||
else:
|
||||
compiler_exe = compiler
|
||||
|
||||
|
||||
|
||||
header = """#
|
||||
# This file was automatically generated. Do not modify this file.
|
||||
# To change compiling options, make the modifications in
|
||||
@ -171,7 +180,7 @@ build_in_lib_dir = "\n".join([
|
||||
|
||||
build_in_exe_dir = "\n".join([
|
||||
header,
|
||||
compiler,
|
||||
compiler_exe,
|
||||
rule_fortran,
|
||||
rule_build_exe,
|
||||
])
|
||||
@ -191,7 +200,6 @@ if USE_GPU:
|
||||
lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
|
||||
else:
|
||||
lib_dirs.remove("mod")
|
||||
print(lib_dirs)
|
||||
|
||||
def create_ninja_in_libdir(directory):
|
||||
def write_rule(f, source_file, replace):
|
||||
|
39
src/mod/cu_quack_module.f90
Normal file
39
src/mod/cu_quack_module.f90
Normal file
@ -0,0 +1,39 @@
|
||||
module cu_quack_module
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
|
||||
implicit none
|
||||
|
||||
! ---
|
||||
|
||||
interface
|
||||
|
||||
subroutine ph_drpa(nO, nBas, eps, ERI, &
|
||||
Omega, XpY, XmY) bind(C, name = "ph_drpa")
|
||||
|
||||
import c_int, c_double
|
||||
integer(c_int), intent(in), value :: nO, nBas
|
||||
real(c_double), intent(in) :: eps(nBas)
|
||||
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||
real(c_double), intent(out) :: Omega(nO*nBas)
|
||||
real(c_double), intent(out) :: XpY(nO*nBas,nO*nBas)
|
||||
real(c_double), intent(out) :: XmY(nO*nBas,nO*nBas)
|
||||
|
||||
end subroutine ph_drpa
|
||||
|
||||
end interface
|
||||
|
||||
! ---
|
||||
|
||||
contains
|
||||
|
||||
subroutine cu_quack_module_test()
|
||||
implicit none
|
||||
print*, ' hello from mod_test'
|
||||
end subroutine cu_quack_module_test
|
||||
|
||||
! ---
|
||||
|
||||
end module cu_quack_module
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user