10
1
mirror of https://github.com/pfloos/quack synced 2025-01-03 01:55:57 +01:00

added Aph_dRPA_sing kernel

This commit is contained in:
Abdallah Ammar 2024-11-26 23:00:15 +01:00
parent 9c42437746
commit 66566a8ce7
4 changed files with 42 additions and 36 deletions

View File

@ -80,9 +80,10 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
! DEBUG
allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
call ph_drpa(nO, nBas, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1))
call ph_drpa(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1))
print *, ' CPU:', Aph(1,1)
print *, ' GPU:', XpY_gpu(1,1)
print *, ' GPU:', XmY_gpu(1,1)
stop
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)

View File

@ -7,7 +7,7 @@
#include "ph_drpa.h"
int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI,
int ph_drpa(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
double *h_Omega, double *h_XpY, double *h_XmY) {
double *d_eps;
@ -16,25 +16,23 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI,
int nBas2 = nBas * nBas;
int nBas4 = nBas2 * nBas2;
int ia, jb;
int nS = nO * (nBas - nO);
for (ia = 0; ia < nS; ia++) {
h_Omega[ia] = 0.0;
for (jb = 0; jb < nS; jb++) {
h_XmY[jb + nO * nBas * ia] = 0.0;
h_XpY[jb + nO * nBas * ia] = 0.0;
h_XmY[jb + ia * nS] = 4.0;
//h_XpY[jb + ia * nS] = 5.0;
}
}
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nO * sizeof(double)),
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
"cudaMalloc", __FILE__, __LINE__);
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
"cudaMalloc", __FILE__, __LINE__);
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nO * sizeof(double), cudaMemcpyHostToDevice),
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
"cudaMemcpy", __FILE__, __LINE__);
check_Cuda_Errors(cudaMemcpy(d_ERI, h_ERI, nBas4 * sizeof(double), cudaMemcpyHostToDevice),
"cudaMemcpy", __FILE__, __LINE__);
@ -42,8 +40,8 @@ int ph_drpa(int nO, int nBas, double *h_eps, double *h_ERI,
// construct A matrix
double *d_A;
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
// phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
// check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
phLR_dRPA_A_sing(nO, nBas, d_eps, d_ERI, d_A);
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
check_Cuda_Errors(cudaMemcpy(h_XpY, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),

View File

@ -4,39 +4,46 @@ __global__ void phLR_dRPA_A_sing_kernel(int nO, int nBas, double *eps, double *E
int i, j, a, b;
int ia, jb, jb_off;
int aa, bb;
int nV, nS, nVS;
int nBas2, nBas3;
int i_A0, i_A1, i_A2;
int i_I0, i_I1, i_I2;
int ij_off0, ij_off;
nV = nBas - nO;
nS = nO * nV;
nVS = nV * nS;
int aa_max = nBas - nO;
int ia_max = aa_max * nO;
nBas2 = nBas * nBas;
nBas3 = nBas2 * nBas;
int nBas2 = nBas * nBas;
int nBas3 = nBas2 * nBas;
aa = blockIdx.x * blockDim.x + threadIdx.x;
bb = blockIdx.y * blockDim.y + threadIdx.y;
int aa = blockIdx.x * blockDim.x + threadIdx.x;
int bb = blockIdx.y * blockDim.y + threadIdx.y;
while(aa < aa_max) {
while(aa < nV) {
a = aa + nO;
ij_off0 = a * nBas2;
i_A0 = aa * nS;
i_I0 = a * nBas2;
while(bb < aa_max) {
while(bb < nV) {
b = bb + nO;
ij_off = ij_off0 + b * nBas;
i_A1 = i_A0 + bb;
i_I1 = i_I0 + b * nBas;
i = 0;
while(i < nO) {
ia = i * aa_max + aa;
jb_off = ia * ia_max;
while(j < nO) {
jb = j * aa_max + bb;
A[jb + jb_off] = 2.0 * ERI[i + j * nBas3 + ij_off];
if(a==b && i==j) {
A[jb + jb_off] += eps[a] - eps[i];
i_A2 = i_A1 + i * nVS;
i_I2 = i_I1 + i;
j = 0;
while(j < nO) {
A[i_A2 + j * nV] = 2.0 * ERI[i_I2 + j * nBas3];
if((a==b) && (i==j)) {
A[i_A2 + j * nV] += eps[a] - eps[i];
}
j ++;

View File

@ -8,16 +8,16 @@ module cu_quack_module
interface
subroutine ph_drpa(nO, nBas, eps, ERI, &
subroutine ph_drpa(nO, nBas, nS, eps, ERI, &
Omega, XpY, XmY) bind(C, name = "ph_drpa")
import c_int, c_double
integer(c_int), intent(in), value :: nO, nBas
integer(c_int), intent(in), value :: nO, nBas, nS
real(c_double), intent(in) :: eps(nBas)
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
real(c_double), intent(out) :: Omega(nO*nBas)
real(c_double), intent(out) :: XpY(nO*nBas,nO*nBas)
real(c_double), intent(out) :: XmY(nO*nBas,nO*nBas)
real(c_double), intent(out) :: Omega(nS)
real(c_double), intent(out) :: XpY(nS,nS)
real(c_double), intent(out) :: XmY(nS,nS)
end subroutine ph_drpa