mirror of
https://github.com/pfloos/quack
synced 2024-12-22 12:23:50 +01:00
added & tested diagonalyzation with Cusolver!
This commit is contained in:
parent
0df9203eb3
commit
10cb7f931d
@ -31,6 +31,7 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
||||
|
||||
! Local variables
|
||||
|
||||
integer :: i
|
||||
integer :: ispin
|
||||
logical :: dRPA
|
||||
double precision :: lambda
|
||||
@ -40,7 +41,7 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
||||
double precision,allocatable :: XpY(:,:)
|
||||
double precision,allocatable :: XmY(:,:)
|
||||
! DEBUG
|
||||
double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
|
||||
!double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
|
||||
|
||||
double precision :: EcRPA(nspin)
|
||||
|
||||
@ -78,15 +79,21 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
||||
call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph)
|
||||
if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
|
||||
|
||||
! DEBUG
|
||||
allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
|
||||
call ph_drpa(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1), XmY_gpu(1,1))
|
||||
print *, ' CPU:', Aph(1,1)
|
||||
print *, ' GPU:', XpY_gpu(1,1)
|
||||
print *, ' GPU:', XmY_gpu(1,1)
|
||||
stop
|
||||
|
||||
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
||||
|
||||
!! DEBUG
|
||||
!allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
|
||||
!call ph_drpa_tda(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1))
|
||||
!do i = 1, nS
|
||||
! print *, i, Om(i), Om_gpu(i)
|
||||
! if(dabs(Om(i) - Om_gpu(i)) .gt. 1d-13) then
|
||||
! print *, 'GPU FAILED!'
|
||||
! stop
|
||||
! endif
|
||||
!enddo
|
||||
!print *, 'GPU DONE!'
|
||||
!stop
|
||||
|
||||
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
||||
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
||||
|
||||
|
@ -1,10 +0,0 @@
|
||||
#ifndef PH_DRPA
|
||||
|
||||
#define PH_DRPA
|
||||
|
||||
extern void check_Cuda_Errors(cudaError_t err, const char * msg, const char * file, int line);
|
||||
extern void check_Cublas_Errors(cublasStatus_t status, const char * msg, const char * file, int line);
|
||||
|
||||
extern void ph_dRPA_A_sing(int nO, int nV, int nBas, int nS, double *eps, double *ERI, double *A);
|
||||
|
||||
#endif
|
10
src/cuda/include/ph_rpa.h
Normal file
10
src/cuda/include/ph_rpa.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef PH_RPA
|
||||
|
||||
#define PH_RPA
|
||||
|
||||
extern void ph_dRPA_A_sing(int nO, int nV, int nBas, int nS, double *eps, double *ERI, double *A);
|
||||
extern void ph_dRPA_B_sing(int nO, int nV, int nBas, int nS, double *ERI, double *B);
|
||||
|
||||
extern void diag_dn_dsyevd(int n, int *info, double *W, double *A);
|
||||
|
||||
#endif
|
@ -1,9 +0,0 @@
|
||||
#ifndef UTILS
|
||||
#define UTILS
|
||||
|
||||
extern "C" void check_Cuda_Errors(cudaError_t err, const char* msg, const char* file, int line);
|
||||
|
||||
extern "C" void check_Cublas_Errors(cublasStatus_t status, const char* msg, const char* file, int line);
|
||||
|
||||
|
||||
#endif
|
9
src/cuda/include/utils.h
Normal file
9
src/cuda/include/utils.h
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef UTILS
|
||||
|
||||
#define UTILS
|
||||
|
||||
extern void check_Cuda_Errors(cudaError_t err, const char *msg, const char *file, int line);
|
||||
extern void check_Cublas_Errors(cublasStatus_t status, const char *msg, const char *file, int line);
|
||||
extern void check_Cusolver_Errors(cusolverStatus_t status, const char *msg, const char *file, int line);
|
||||
|
||||
#endif
|
41
src/cuda/src/diag_dense_dsy_mat.cu
Normal file
41
src/cuda/src/diag_dense_dsy_mat.cu
Normal file
@ -0,0 +1,41 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusolverDn.h>
|
||||
|
||||
|
||||
|
||||
extern "C" void diag_dn_dsyevd(int n, int *info, double *W, double *A) {
|
||||
|
||||
cusolverDnHandle_t cusolverH = NULL;
|
||||
cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // Compute eigenvalues and eigenvectors
|
||||
cublasFillMode_t uplo = CUBLAS_FILL_MODE_UPPER; // Upper triangular part of the matrix is stored
|
||||
|
||||
int lwork = 0;
|
||||
double *work = NULL;
|
||||
|
||||
//check_Cusolver_Errors(cusolverDnCreate(&cusolverH), "cusolverDnCreate", __FILE__, __LINE__);
|
||||
cusolverDnCreate(&cusolverH);
|
||||
|
||||
// Query workspace size
|
||||
//check_Cusolver_Errors(cusolverDnDsyevd_bufferSize(cusolverH, jobz, uplo, n, A, n, W, &lwork),
|
||||
// "cusolverDnDsyevd_bufferSize", __FILE__, __LINE__);
|
||||
//check_Cuda_Errors(cudaMalloc((void**)&work, sizeof(double) * lwork),
|
||||
// "cudaMemcpy", __FILE__, __LINE__);
|
||||
cusolverDnDsyevd_bufferSize(cusolverH, jobz, uplo, n, A, n, W, &lwork);
|
||||
cudaMalloc((void**)&work, sizeof(double) * lwork);
|
||||
|
||||
// Compute eigenvalues and eigenvectors
|
||||
//check_Cusolver_Errors(cusolverDnDsyevd(cusolverH, jobz, uplo, n, A, n, W, work, lwork, info),
|
||||
// "cusolverDnDsyevd", __FILE__, __LINE__);
|
||||
cusolverDnDsyevd(cusolverH, jobz, uplo, n, A, n, W, work, lwork, info);
|
||||
|
||||
// Clean up
|
||||
//check_Cuda_Errors(cudaFree(work), "cudaFree", __FILE__, __LINE__);
|
||||
//check_Cusolver_Errors(cusolverDnDestroy(cusolverH), "cusolverDnDestroy", __FILE__, __LINE__);
|
||||
|
||||
cudaFree(work);
|
||||
cusolverDnDestroy(cusolverH);
|
||||
|
||||
}
|
||||
|
@ -1,59 +0,0 @@
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#include "ph_drpa.h"
|
||||
|
||||
int ph_drpa(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
||||
double *h_Omega, double *h_XpY, double *h_XmY) {
|
||||
|
||||
double *d_eps;
|
||||
double *d_ERI;
|
||||
|
||||
int nV = nBas - nO;
|
||||
|
||||
int nBas2 = nBas * nBas;
|
||||
int nBas4 = nBas2 * nBas2;
|
||||
|
||||
int ia, jb;
|
||||
for (ia = 0; ia < nS; ia++) {
|
||||
h_Omega[ia] = 0.0;
|
||||
for (jb = 0; jb < nS; jb++) {
|
||||
h_XmY[jb + ia * nS] = 4.0;
|
||||
//h_XpY[jb + ia * nS] = 5.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaMemcpy(d_ERI, h_ERI, nBas4 * sizeof(double), cudaMemcpyHostToDevice),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
// construct A matrix
|
||||
double *d_A;
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
|
||||
ph_dRPA_A_sing(nO, nV, nBas, nS, d_eps, d_ERI, d_A);
|
||||
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMemcpy(h_XpY, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
76
src/cuda/src/ph_drpa_tda.c
Normal file
76
src/cuda/src/ph_drpa_tda.c
Normal file
@ -0,0 +1,76 @@
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <cublas_v2.h>
|
||||
#include <cusolverDn.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "ph_rpa.h"
|
||||
|
||||
void ph_drpa_tda(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
||||
double *h_Omega, double *h_X) {
|
||||
|
||||
double *d_eps = NULL;
|
||||
double *d_ERI = NULL;
|
||||
|
||||
int nV = nBas - nO;
|
||||
|
||||
int nBas2 = nBas * nBas;
|
||||
int nBas4 = nBas2 * nBas2;
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
|
||||
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaMemcpy(d_ERI, h_ERI, nBas4 * sizeof(double), cudaMemcpyHostToDevice),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
// construct A
|
||||
double *d_A = NULL;
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
|
||||
|
||||
ph_dRPA_A_sing(nO, nV, nBas, nS, d_eps, d_ERI, d_A);
|
||||
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||
|
||||
|
||||
// diagonalize A
|
||||
int *d_info = NULL;
|
||||
double *d_Omega = NULL;
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_info, sizeof(int)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaMalloc((void**)&d_Omega, nS * sizeof(double)),
|
||||
"cudaMalloc", __FILE__, __LINE__);
|
||||
|
||||
diag_dn_dsyevd(nS, d_info, d_Omega, d_A);
|
||||
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||
|
||||
int info_gpu = 0;
|
||||
check_Cuda_Errors(cudaMemcpy(&info_gpu, d_info, sizeof(int), cudaMemcpyDeviceToHost),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
if (info_gpu != 0) {
|
||||
printf("Error: diag_dn_dsyevd returned error code %d\n", info_gpu);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
check_Cuda_Errors(cudaMemcpy(h_X, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
check_Cuda_Errors(cudaMemcpy(h_Omega, d_Omega, nS * sizeof(double), cudaMemcpyDeviceToHost),
|
||||
"cudaMemcpy", __FILE__, __LINE__);
|
||||
|
||||
check_Cuda_Errors(cudaFree(d_info), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);
|
||||
check_Cuda_Errors(cudaFree(d_Omega), "cudaFree", __FILE__, __LINE__);
|
||||
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <stdio.h>
|
||||
#include <cublas_v2.h>
|
||||
#include <cstring>
|
||||
#include <cusolverDn.h>
|
||||
|
||||
|
||||
|
||||
@ -53,3 +54,72 @@ extern "C" void check_Cublas_Errors(cublasStatus_t status, const char* msg, cons
|
||||
}
|
||||
|
||||
|
||||
|
||||
const char* cusolver_Get_Error_String(cusolverStatus_t status) {
|
||||
switch (status) {
|
||||
case CUSOLVER_STATUS_SUCCESS:
|
||||
return "CUSOLVER_STATUS_SUCCESS";
|
||||
case CUSOLVER_STATUS_NOT_INITIALIZED:
|
||||
return "CUSOLVER_STATUS_NOT_INITIALIZED";
|
||||
case CUSOLVER_STATUS_ALLOC_FAILED:
|
||||
return "CUSOLVER_STATUS_ALLOC_FAILED";
|
||||
case CUSOLVER_STATUS_INVALID_VALUE:
|
||||
return "CUSOLVER_STATUS_INVALID_VALUE";
|
||||
case CUSOLVER_STATUS_ARCH_MISMATCH:
|
||||
return "CUSOLVER_STATUS_ARCH_MISMATCH";
|
||||
case CUSOLVER_STATUS_MAPPING_ERROR:
|
||||
return "CUSOLVER_STATUS_MAPPING_ERROR";
|
||||
case CUSOLVER_STATUS_EXECUTION_FAILED:
|
||||
return "CUSOLVER_STATUS_EXECUTION_FAILED";
|
||||
case CUSOLVER_STATUS_INTERNAL_ERROR:
|
||||
return "CUSOLVER_STATUS_INTERNAL_ERROR";
|
||||
case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
|
||||
return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
|
||||
case CUSOLVER_STATUS_NOT_SUPPORTED:
|
||||
return "CUSOLVER_STATUS_NOT_SUPPORTED";
|
||||
case CUSOLVER_STATUS_ZERO_PIVOT:
|
||||
return "CUSOLVER_STATUS_ZERO_PIVOT";
|
||||
case CUSOLVER_STATUS_INVALID_LICENSE:
|
||||
return "CUSOLVER_STATUS_INVALID_LICENSE";
|
||||
case CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED:
|
||||
return "CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED";
|
||||
case CUSOLVER_STATUS_IRS_PARAMS_INVALID:
|
||||
return "CUSOLVER_STATUS_IRS_PARAMS_INVALID";
|
||||
case CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC:
|
||||
return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC";
|
||||
case CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE:
|
||||
return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE";
|
||||
case CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER:
|
||||
return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER";
|
||||
case CUSOLVER_STATUS_IRS_INTERNAL_ERROR:
|
||||
return "CUSOLVER_STATUS_IRS_INTERNAL_ERROR";
|
||||
case CUSOLVER_STATUS_IRS_NOT_SUPPORTED:
|
||||
return "CUSOLVER_STATUS_IRS_NOT_SUPPORTED";
|
||||
case CUSOLVER_STATUS_IRS_OUT_OF_RANGE:
|
||||
return "CUSOLVER_STATUS_IRS_OUT_OF_RANGE";
|
||||
case CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES:
|
||||
return "CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES";
|
||||
case CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED:
|
||||
return "CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED";
|
||||
case CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED:
|
||||
return "CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED";
|
||||
case CUSOLVER_STATUS_IRS_MATRIX_SINGULAR:
|
||||
return "CUSOLVER_STATUS_IRS_MATRIX_SINGULAR";
|
||||
case CUSOLVER_STATUS_INVALID_WORKSPACE:
|
||||
return "CUSOLVER_STATUS_INVALID_WORKSPACE";
|
||||
default:
|
||||
return "UNKNOWN CUSOLVER ERROR";
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void check_Cusolver_Errors(cusolverStatus_t status, const char* msg, const char* file, int line) {
|
||||
|
||||
const char* err = cusolver_Get_Error_String(status);
|
||||
|
||||
if (status != CUSOLVER_STATUS_SUCCESS) {
|
||||
printf("CUSOLVER Error in %s at line %d\n", file, line);
|
||||
printf("%s - %s\n", msg, err);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ else:
|
||||
|
||||
if USE_GPU:
|
||||
compiler_tmp = compiler.strip().split('\n')
|
||||
compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas".format(QUACK_ROOT)
|
||||
compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas -lcusolver".format(QUACK_ROOT)
|
||||
compiler_exe = '\n'.join(compiler_tmp)
|
||||
else:
|
||||
compiler_exe = compiler
|
||||
|
@ -8,18 +8,17 @@ module cu_quack_module
|
||||
|
||||
interface
|
||||
|
||||
subroutine ph_drpa(nO, nBas, nS, eps, ERI, &
|
||||
Omega, XpY, XmY) bind(C, name = "ph_drpa")
|
||||
subroutine ph_drpa_tda(nO, nBas, nS, eps, ERI, &
|
||||
Omega, X) bind(C, name = "ph_drpa_tda")
|
||||
|
||||
import c_int, c_double
|
||||
integer(c_int), intent(in), value :: nO, nBas, nS
|
||||
real(c_double), intent(in) :: eps(nBas)
|
||||
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||
real(c_double), intent(out) :: Omega(nS)
|
||||
real(c_double), intent(out) :: XpY(nS,nS)
|
||||
real(c_double), intent(out) :: XmY(nS,nS)
|
||||
real(c_double), intent(out) :: X(nS,nS)
|
||||
|
||||
end subroutine ph_drpa
|
||||
end subroutine ph_drpa_tda
|
||||
|
||||
end interface
|
||||
|
||||
@ -29,7 +28,7 @@ module cu_quack_module
|
||||
|
||||
subroutine cu_quack_module_test()
|
||||
implicit none
|
||||
print*, ' hello from mod_test'
|
||||
print*, ' hello from cu_quack_module'
|
||||
end subroutine cu_quack_module_test
|
||||
|
||||
! ---
|
||||
|
Loading…
Reference in New Issue
Block a user