From cff0232eb59cc54262370f7d6751518958a67cd2 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 26 Nov 2024 09:37:46 +0100 Subject: [PATCH] workin on dRPA with CUDA --- src/cuda/Makefile | 63 ++++++++++++++++++++++++ src/cuda/include/ph_drpa.h | 10 ++++ src/cuda/include/utils.cuh | 9 ++++ src/cuda/src/ph_drpa.c | 47 ++++++++++++++++++ src/cuda/src/phlr_drpa_a_sing.cu | 82 ++++++++++++++++++++++++++++++++ src/cuda/src/utils.cu | 53 +++++++++++++++++++++ 6 files changed, 264 insertions(+) create mode 100644 src/cuda/Makefile create mode 100644 src/cuda/include/ph_drpa.h create mode 100644 src/cuda/include/utils.cuh create mode 100644 src/cuda/src/ph_drpa.c create mode 100644 src/cuda/src/phlr_drpa_a_sing.cu create mode 100644 src/cuda/src/utils.cu diff --git a/src/cuda/Makefile b/src/cuda/Makefile new file mode 100644 index 0000000..d76e96d --- /dev/null +++ b/src/cuda/Makefile @@ -0,0 +1,63 @@ +NVCC = nvcc +NFLAGS = -O2 --compiler-options '-fPIC' +NDFLAGS = --shared + +CC = gcc +CFLAGS = -fPIC -O2 -Wall -g + +FC = gfortran +FFLAGS = -O2 -Wall -g + +SRC_DIR = src +INC_DIR = include + +BIN_DIR = bin +BLD_DIR = build +$(shell mkdir -p $(BIN_DIR)) +$(shell mkdir -p $(BLD_DIR)) + +CU_SRC = $(wildcard $(SRC_DIR)/*.cu) +CU_OBJ = $(CU_SRC:$(SRC_DIR)/%.cu=$(BLD_DIR)/%.o) + +C_SRC = $(SRC_DIR)/ph_drpa.c +C_OBJ = $(BLD_DIR)/ph_drpa.o + +F_SRC = $(SRC_DIR)/cu_quack_module.f90 +F_OBJ = $(BLD_DIR)/cu_quack_module.f90 + +MAIN_SRC = $(SRC_DIR)/cu_quack.f90 +MAIN_OBJ = $(BLD_DIR)/cu_quack.o + +OUTPUT_LIB = $(BLD_DIR)/libcuquack.so + +CUDA_LIBS = -lcudart -lcublas + + +all: $(OUTPUT_LIB) + +$(OUTPUT_LIB): $(CU_OBJ) $(C_OBJ) + $(NVCC) $(NFLAGS) $(NLDFLAGS) $^ -o $@ $(CUDA_LIBS) -I$(INC_DIR) + +$(BLD_DIR)/%.o: $(SRC_DIR)/%.cu + $(NVCC) $(NFLAGS) -c $< -o $@ -I$(INC_DIR) + +$(C_OBJ): $(C_SRC) + @for src in $(C_SRC); do \ + obj=$(BLD_DIR)/$$(basename $${src} .c).o; \ + echo "$(CC) $(CFLAGS) -c $$src -o $$obj -I$(INC_DIR)"; \ + $(CC) $(CFLAGS) -c $$src -o $$obj -I$(INC_DIR); \ + done + +$(F_OBJ): $(F_SRC) + $(FC) $(FFLAGS) -c $< -o $@ -J$(BLD_DIR) + +$(MAIN_OBJ): $(MAIN_SRC) + $(FC) $(FFLAGS) -c $< -o $@ -J$(BLD_DIR) + + + +.PHONY: clean +clean: + rm -f $(BLD_DIR)/*.o $(BLD_DIR)/*.so $(BLD_DIR)/*.mod $(BIN_DIR)/* + + diff --git a/src/cuda/include/ph_drpa.h b/src/cuda/include/ph_drpa.h new file mode 100644 index 0000000..6f1ed07 --- /dev/null +++ b/src/cuda/include/ph_drpa.h @@ -0,0 +1,10 @@ +#ifndef PH_DRPA + +#define PH_DRPA + +extern void check_Cuda_Errors(cudaError_t err, const char * msg, const char * file, int line); +extern void check_Cublas_Errors(cublasStatus_t status, const char * msg, const char * file, int line); + +extern void phLR_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, double *A); + +#endif diff --git a/src/cuda/include/utils.cuh b/src/cuda/include/utils.cuh new file mode 100644 index 0000000..1a91732 --- /dev/null +++ b/src/cuda/include/utils.cuh @@ -0,0 +1,9 @@ +#ifndef UTILS +#define UTILS + +extern "C" void check_Cuda_Errors(cudaError_t err, const char* msg, const char* file, int line); + +extern "C" void check_Cublas_Errors(cublasStatus_t status, const char* msg, const char* file, int line); + + +#endif diff --git a/src/cuda/src/ph_drpa.c b/src/cuda/src/ph_drpa.c new file mode 100644 index 0000000..888abaa --- /dev/null +++ b/src/cuda/src/ph_drpa.c @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include +#include + +#include "ph_drpa.h" + +int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI) { + + + + double *d_eps; + double *d_ERI; + + int nBas2 = nBas * nBas; + int nBas4 = nBas2 * nBas2; + + + check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)), + "cudaMalloc", __FILE__, __LINE__); + check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)), + "cudaMalloc", __FILE__, __LINE__); + + + check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nO * sizeof(double), cudaMemcpyHostToDevice), + "cudaMemcpy", __FILE__, __LINE__); + check_Cuda_Errors(cudaMemcpy(d_ERI, h_ERI, nBas4 * sizeof(double), cudaMemcpyHostToDevice), + "cudaMemcpy", __FILE__, __LINE__); + + // construct A matrix + int nS = nO * (nBas * nO); + double *d_A; + check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__); + phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A); + check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__); + + + check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__); + check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__); + check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__); + + + return 0; +} + diff --git a/src/cuda/src/phlr_drpa_a_sing.cu b/src/cuda/src/phlr_drpa_a_sing.cu new file mode 100644 index 0000000..3cbc556 --- /dev/null +++ b/src/cuda/src/phlr_drpa_a_sing.cu @@ -0,0 +1,82 @@ +#include + +__global__ void phLR_dRPA_A_sing_kernel(int nO, int nBas, double *eps, double *ERI, double *A) { + + + int i, j, a, b; + int ia, jb, jb_off; + + int ij_off0, ij_off; + + int aa_max = nBas - nO; + int ia_max = aa_max * nO; + + int nBas2 = nBas * nBas; + int nBas3 = nBas2 * nBas; + + int aa = blockIdx.x * blockDim.x + threadIdx.x; + int bb = blockIdx.y * blockDim.y + threadIdx.y; + + while(aa < aa_max) { + a = aa + nO; + + ij_off0 = a * nBas2; + + while(bb < aa_max) { + b = bb + nO; + + ij_off = ij_off0 + b * nBas; + + while(i < nO) { + ia = i * aa_max + aa; + jb_off = ia * ia_max; + + while(j < nO) { + jb = j * aa_max + bb; + + A[jb + jb_off] = 2.0 * ERI[i + j * nBas3 + ij_off]; + if(a==b && i==j) { + A[jb + jb_off] += eps[a] - eps[i]; + } + + j ++; + } // j + + i ++; + } // i + + bb += blockDim.y * gridDim.y; + } // bb + + aa += blockDim.x * gridDim.x; + } // aa + +} + + + + + +extern "C" void phLR_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, double *A) { + + + int size = nBas - nO; + + int sBlocks = 32; + int nBlocks = (size + sBlocks - 1) / sBlocks; + + dim3 dimGrid(nBlocks, nBlocks, 1); + dim3 dimBlock(sBlocks, sBlocks, 1); + + + printf("lunching phLR_dRPA_A_sing_kernel with %dx%d blocks and %dx%d threads/block\n", + nBlocks, nBlocks, sBlocks, sBlocks); + + + phLR_dRPA_A_sing_kernel<<>>(nO, nBas, eps, ERI, A); + +} + + + + diff --git a/src/cuda/src/utils.cu b/src/cuda/src/utils.cu new file mode 100644 index 0000000..b20c52d --- /dev/null +++ b/src/cuda/src/utils.cu @@ -0,0 +1,53 @@ +#include +#include +#include +#include + + +extern "C" void check_Cuda_Errors(cudaError_t err, const char* msg, const char* file, int line) { + if (err != cudaSuccess) { + printf("CUDA Error in %s at line %d\n", file, line); + printf("%s - %s\n", msg, cudaGetErrorString(err)); + exit(0); + } +} + + +const char* cublasGetErrorString(cublasStatus_t status) { + switch (status) { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; + } + return "UNKNOWN CUBLAS ERROR"; +} + +extern "C" void check_Cublas_Errors(cublasStatus_t status, const char* msg, const char* file, int line) { + + const char* err = cublasGetErrorString(status); + + if (err != "CUBLAS_STATUS_SUCCESS") { + printf("CUBLAS Error in %s at line %d\n", file, line); + printf("%s - %s\n", msg, err); + exit(0); + } +} + +