From a7fa8fd49a2defa72b3a7c86b12ceb16a8e0c454 Mon Sep 17 00:00:00 2001
From: Abdallah Ammar <abd.ammar.phys@gmail.com>
Date: Wed, 27 Nov 2024 09:03:33 +0100
Subject: [PATCH] added cuda kernel for B-sing-dRPA

---
 src/cuda/src/ph_drpa_b_sing.cu | 82 ++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 src/cuda/src/ph_drpa_b_sing.cu

diff --git a/src/cuda/src/ph_drpa_b_sing.cu b/src/cuda/src/ph_drpa_b_sing.cu
new file mode 100644
index 0000000..2a59142
--- /dev/null
+++ b/src/cuda/src/ph_drpa_b_sing.cu
@@ -0,0 +1,82 @@
+#include <stdio.h>
+
+__global__ void ph_dRPA_B_sing_kernel(int nO, int nV, int nBas, int nS, double *ERI, double *B) {
+
+
+    int i, j, a, b;
+    int aa, bb;
+    int nVS;
+    int nBas2, nBas3;
+    int i_B0, i_B1, i_B2;
+    int i_I0, i_I1, i_I2;
+
+    nVS = nV * nS;
+
+    nBas2 = nBas * nBas;
+    nBas3 = nBas2 * nBas;
+
+    aa = blockIdx.x * blockDim.x + threadIdx.x;
+    bb = blockIdx.y * blockDim.y + threadIdx.y;
+
+    while(aa < nV) {
+        a = aa + nO;
+
+        i_B0 = aa * nS;
+        i_I0 = a * nBas2;
+
+        while(bb < nV) {
+            b = bb + nO;
+
+            i_B1 = i_B0 + bb;
+            i_I1 = i_I0 + b * nBas3;
+
+            i = 0;
+            while(i < nO) {
+
+                i_B2 = i_B1 + i * nVS;
+                i_I2 = i_I1 + i;
+ 
+                j = 0;
+                while(j < nO) {
+
+                    B[i_B2 + j * nV] = 2.0 * ERI[i_I2 + j * nBas];
+
+                    j ++;
+	        } // j
+
+                i ++;
+            } // i
+
+            bb += blockDim.y * gridDim.y;
+        } // bb
+
+        aa += blockDim.x * gridDim.x;
+    } // aa
+
+}
+
+
+
+
+
+extern "C" void ph_dRPA_B_sing(int nO, int nV, int nBas, int nS, double *ERI, double *B) {
+
+
+    int sBlocks = 32;
+    int nBlocks = (nV + sBlocks - 1) / sBlocks;
+
+    dim3 dimGrid(nBlocks, nBlocks, 1);
+    dim3 dimBlock(sBlocks, sBlocks, 1);
+
+
+    printf("lunching ph_dRPA_B_sing_kernel with %dx%d blocks and %dx%d threads/block\n",
+        nBlocks, nBlocks, sBlocks, sBlocks);
+
+
+    ph_dRPA_B_sing_kernel<<<dimGrid, dimBlock>>>(nO, nV, nBas, nS, ERI, B);
+
+}
+
+
+
+