4
1
mirror of https://github.com/pfloos/quack synced 2025-01-03 01:56:09 +01:00

added cuda kernel for A-trip-dRPA

This commit is contained in:
Abdallah Ammar 2024-11-27 08:51:04 +01:00
parent 66566a8ce7
commit b2e82bff2e
4 changed files with 95 additions and 6 deletions

View File

@ -5,6 +5,6 @@
extern void check_Cuda_Errors(cudaError_t err, const char * msg, const char * file, int line);
extern void check_Cublas_Errors(cublasStatus_t status, const char * msg, const char * file, int line);
extern void phLR_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, double *A);
extern void ph_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, double *A);
#endif

View File

@ -40,7 +40,7 @@ int ph_drpa(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
// construct A matrix
double *d_A;
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
ph_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);

View File

@ -1,6 +1,6 @@
#include <stdio.h>
__global__ void phLR_dRPA_A_sing_kernel(int nO, int nBas, double *eps, double *ERI, double *A) {
__global__ void ph_dRPA_A_sing_kernel(int nO, int nBas, double *eps, double *ERI, double *A) {
int i, j, a, b;
@ -64,7 +64,7 @@ __global__ void phLR_dRPA_A_sing_kernel(int nO, int nBas, double *eps, double *E
extern "C" void phLR_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, double *A) {
extern "C" void ph_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, double *A) {
int size = nBas - nO;
@ -76,11 +76,11 @@ extern "C" void phLR_dRPA_A_sing(int nO, int nBas, double *eps, double *ERI, dou
dim3 dimBlock(sBlocks, sBlocks, 1);
printf("lunching phLR_dRPA_A_sing_kernel with %dx%d blocks and %dx%d threads/block\n",
printf("lunching ph_dRPA_A_sing_kernel with %dx%d blocks and %dx%d threads/block\n",
nBlocks, nBlocks, sBlocks, sBlocks);
phLR_dRPA_A_sing_kernel<<<dimGrid, dimBlock>>>(nO, nBas, eps, ERI, A);
ph_dRPA_A_sing_kernel<<<dimGrid, dimBlock>>>(nO, nBas, eps, ERI, A);
}

View File

@ -0,0 +1,89 @@
#include <stdio.h>
__global__ void ph_dRPA_A_trip_kernel(int nO, int nBas, double *eps, double *A) {
int i, j, a, b;
int aa, bb;
int nV, nS, nVS;
int nBas2, nBas3;
int i_A0, i_A1, i_A2;
int i_I0, i_I1, i_I2;
nV = nBas - nO;
nS = nO * nV;
nVS = nV * nS;
nBas2 = nBas * nBas;
nBas3 = nBas2 * nBas;
aa = blockIdx.x * blockDim.x + threadIdx.x;
bb = blockIdx.y * blockDim.y + threadIdx.y;
while(aa < nV) {
a = aa + nO;
i_A0 = aa * nS;
i_I0 = a * nBas2;
while(bb < nV) {
b = bb + nO;
i_A1 = i_A0 + bb;
i_I1 = i_I0 + b * nBas;
i = 0;
while(i < nO) {
i_A2 = i_A1 + i * nVS;
i_I2 = i_I1 + i;
j = 0;
while(j < nO) {
A[i_A2 + j * nV] = 0.0;
if((a==b) && (i==j)) {
A[i_A2 + j * nV] += eps[a] - eps[i];
}
j ++;
} // j
i ++;
} // i
bb += blockDim.y * gridDim.y;
} // bb
aa += blockDim.x * gridDim.x;
} // aa
}
extern "C" void ph_dRPA_A_trip(int nO, int nBas, double *eps, double *A) {
int size = nBas - nO;
int sBlocks = 32;
int nBlocks = (size + sBlocks - 1) / sBlocks;
dim3 dimGrid(nBlocks, nBlocks, 1);
dim3 dimBlock(sBlocks, sBlocks, 1);
printf("lunching ph_dRPA_A_trip_kernel with %dx%d blocks and %dx%d threads/block\n",
nBlocks, nBlocks, sBlocks, sBlocks);
ph_dRPA_A_trip_kernel<<<dimGrid, dimBlock>>>(nO, nBas, eps, A);
}