mirror of
https://github.com/pfloos/quack
synced 2025-01-03 01:56:09 +01:00
refact in GPU direct & fixed bug in read dipole integrals
This commit is contained in:
parent
6ff3fc2905
commit
e43a56e042
156
src/GPU/phRRPA_GPU.f90
Normal file
156
src/GPU/phRRPA_GPU.f90
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
|
||||||
|
use cu_quack_module
|
||||||
|
|
||||||
|
! Perform a direct random phase approximation calculation
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
include 'parameters.h'
|
||||||
|
include 'quadrature.h'
|
||||||
|
|
||||||
|
! Input variables
|
||||||
|
|
||||||
|
logical,intent(in) :: dotest
|
||||||
|
|
||||||
|
logical,intent(in) :: TDA
|
||||||
|
logical,intent(in) :: doACFDT
|
||||||
|
logical,intent(in) :: exchange_kernel
|
||||||
|
logical,intent(in) :: singlet
|
||||||
|
logical,intent(in) :: triplet
|
||||||
|
integer,intent(in) :: nBas
|
||||||
|
integer,intent(in) :: nC
|
||||||
|
integer,intent(in) :: nO
|
||||||
|
integer,intent(in) :: nV
|
||||||
|
integer,intent(in) :: nR
|
||||||
|
integer,intent(in) :: nS
|
||||||
|
double precision,intent(in) :: ENuc
|
||||||
|
double precision,intent(in) :: ERHF
|
||||||
|
double precision,intent(in) :: eHF(nBas)
|
||||||
|
double precision,intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
double precision,intent(in) :: dipole_int(nBas,nBas,ncart)
|
||||||
|
|
||||||
|
! Local variables
|
||||||
|
|
||||||
|
integer :: i
|
||||||
|
integer :: ispin
|
||||||
|
logical :: dRPA
|
||||||
|
double precision :: t1, t2
|
||||||
|
double precision :: lambda
|
||||||
|
double precision,allocatable :: Aph(:,:)
|
||||||
|
double precision,allocatable :: Bph(:,:)
|
||||||
|
double precision,allocatable :: Om(:)
|
||||||
|
double precision,allocatable :: XpY(:,:)
|
||||||
|
double precision,allocatable :: XmY(:,:)
|
||||||
|
! DEBUG
|
||||||
|
!double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
|
||||||
|
|
||||||
|
double precision :: EcRPA(nspin)
|
||||||
|
|
||||||
|
! Hello world
|
||||||
|
|
||||||
|
write(*,*)
|
||||||
|
write(*,*)'*********************************'
|
||||||
|
write(*,*)'* Restricted ph-RPA Calculation *'
|
||||||
|
write(*,*)'*********************************'
|
||||||
|
write(*,*)
|
||||||
|
|
||||||
|
! TDA
|
||||||
|
|
||||||
|
if(TDA) then
|
||||||
|
write(*,*) 'Tamm-Dancoff approximation activated!'
|
||||||
|
write(*,*)
|
||||||
|
end if
|
||||||
|
|
||||||
|
! Initialization
|
||||||
|
|
||||||
|
dRPA = .true.
|
||||||
|
EcRPA(:) = 0d0
|
||||||
|
lambda = 1d0
|
||||||
|
|
||||||
|
! Memory allocation
|
||||||
|
|
||||||
|
allocate(Om(nS),XpY(nS,nS),XmY(nS,nS),Aph(nS,nS),Bph(nS,nS))
|
||||||
|
|
||||||
|
! Singlet manifold
|
||||||
|
|
||||||
|
if(singlet) then
|
||||||
|
|
||||||
|
if(TDA) then
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
call ph_drpa_tda_sing(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om(1), XpY(1,1))
|
||||||
|
call wall_time(t2)
|
||||||
|
print*, 'diag time on GPU (sec):', t2 - t1
|
||||||
|
stop
|
||||||
|
XmY(:,:) = XpY(:,:)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
! TODO
|
||||||
|
!call ph_drpa_sing(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om(1), XpY(1,1))
|
||||||
|
!XmY(:,:) = XpY(:,:)
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
||||||
|
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
||||||
|
|
||||||
|
end if
|
||||||
|
|
||||||
|
! Triplet manifold
|
||||||
|
|
||||||
|
if(triplet) then
|
||||||
|
|
||||||
|
ispin = 2
|
||||||
|
|
||||||
|
call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,eHF,ERI,Aph)
|
||||||
|
if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
|
||||||
|
|
||||||
|
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
||||||
|
call print_excitation_energies('phRPA@RHF','triplet',nS,Om)
|
||||||
|
call phLR_transition_vectors(.false.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
||||||
|
|
||||||
|
end if
|
||||||
|
|
||||||
|
if(exchange_kernel) then
|
||||||
|
|
||||||
|
EcRPA(1) = 0.5d0*EcRPA(1)
|
||||||
|
EcRPA(2) = 1.5d0*EcRPA(2)
|
||||||
|
|
||||||
|
end if
|
||||||
|
|
||||||
|
write(*,*)
|
||||||
|
write(*,*)'-------------------------------------------------------------------------------'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'Tr@phRPA@RHF correlation energy (singlet) = ',EcRPA(1),' au'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'Tr@phRPA@RHF correlation energy (triplet) = ',EcRPA(2),' au'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'Tr@phRPA@RHF correlation energy = ',sum(EcRPA),' au'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'Tr@phRPA@RHF total energy = ',ENuc + ERHF + sum(EcRPA),' au'
|
||||||
|
write(*,*)'-------------------------------------------------------------------------------'
|
||||||
|
write(*,*)
|
||||||
|
|
||||||
|
deallocate(Om,XpY,XmY,Aph,Bph)
|
||||||
|
|
||||||
|
! Compute the correlation energy via the adiabatic connection
|
||||||
|
|
||||||
|
if(doACFDT) then
|
||||||
|
|
||||||
|
call phACFDT(exchange_kernel,dRPA,TDA,singlet,triplet,nBas,nC,nO,nV,nR,nS,ERI,eHF,EcRPA)
|
||||||
|
|
||||||
|
write(*,*)
|
||||||
|
write(*,*)'-------------------------------------------------------------------------------'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'AC@phRPA@RHF correlation energy (singlet) = ',EcRPA(1),' au'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'AC@phRPA@RHF correlation energy (triplet) = ',EcRPA(2),' au'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'AC@phRPA@RHF correlation energy = ',sum(EcRPA),' au'
|
||||||
|
write(*,'(2X,A50,F20.10,A3)') 'AC@phRPA@RHF total energy = ',ENuc + ERHF + sum(EcRPA),' au'
|
||||||
|
write(*,*)'-------------------------------------------------------------------------------'
|
||||||
|
write(*,*)
|
||||||
|
|
||||||
|
end if
|
||||||
|
|
||||||
|
if(dotest) then
|
||||||
|
|
||||||
|
call dump_test_value('R','phRPA correlation energy',sum(EcRPA))
|
||||||
|
|
||||||
|
end if
|
||||||
|
|
||||||
|
end subroutine
|
@ -15,6 +15,7 @@ subroutine phLR(TDA,nS,Aph,Bph,EcRPA,Om,XpY,XmY)
|
|||||||
! Local variables
|
! Local variables
|
||||||
|
|
||||||
double precision :: trace_matrix
|
double precision :: trace_matrix
|
||||||
|
double precision :: t1, t2
|
||||||
double precision,allocatable :: ApB(:,:)
|
double precision,allocatable :: ApB(:,:)
|
||||||
double precision,allocatable :: AmB(:,:)
|
double precision,allocatable :: AmB(:,:)
|
||||||
double precision,allocatable :: AmBSq(:,:)
|
double precision,allocatable :: AmBSq(:,:)
|
||||||
@ -38,7 +39,10 @@ subroutine phLR(TDA,nS,Aph,Bph,EcRPA,Om,XpY,XmY)
|
|||||||
if(TDA) then
|
if(TDA) then
|
||||||
|
|
||||||
XpY(:,:) = Aph(:,:)
|
XpY(:,:) = Aph(:,:)
|
||||||
|
!call wall_time(t1)
|
||||||
call diagonalize_matrix(nS,XpY,Om)
|
call diagonalize_matrix(nS,XpY,Om)
|
||||||
|
!call wall_time(t2)
|
||||||
|
!print*, 'diag time on CPU (sec):', t2 - t1
|
||||||
XpY(:,:) = transpose(XpY(:,:))
|
XpY(:,:) = transpose(XpY(:,:))
|
||||||
XmY(:,:) = XpY(:,:)
|
XmY(:,:) = XpY(:,:)
|
||||||
|
|
||||||
|
@ -177,7 +177,6 @@ program QuAcK
|
|||||||
|
|
||||||
call read_integrals(working_dir,nBas,S,T,V,Hc,ERI_AO)
|
call read_integrals(working_dir,nBas,S,T,V,Hc,ERI_AO)
|
||||||
call read_dipole_integrals(working_dir,nBas,dipole_int_AO)
|
call read_dipole_integrals(working_dir,nBas,dipole_int_AO)
|
||||||
|
|
||||||
call wall_time(end_int)
|
call wall_time(end_int)
|
||||||
|
|
||||||
t_int = end_int - start_int
|
t_int = end_int - start_int
|
||||||
|
@ -50,6 +50,8 @@ subroutine RRPA(dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_ker
|
|||||||
write(*,'(A65,1X,F9.3,A8)') 'Total wall time for RPA = ',t_RPA,' seconds'
|
write(*,'(A65,1X,F9.3,A8)') 'Total wall time for RPA = ',t_RPA,' seconds'
|
||||||
write(*,*)
|
write(*,*)
|
||||||
|
|
||||||
|
!call phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
|
||||||
end if
|
end if
|
||||||
|
|
||||||
!------------------------------------------------------------------------
|
!------------------------------------------------------------------------
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
|
||||||
! use cu_quack_module
|
|
||||||
|
|
||||||
! Perform a direct random phase approximation calculation
|
! Perform a direct random phase approximation calculation
|
||||||
|
|
||||||
implicit none
|
implicit none
|
||||||
@ -40,8 +38,6 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
|||||||
double precision,allocatable :: Om(:)
|
double precision,allocatable :: Om(:)
|
||||||
double precision,allocatable :: XpY(:,:)
|
double precision,allocatable :: XpY(:,:)
|
||||||
double precision,allocatable :: XmY(:,:)
|
double precision,allocatable :: XmY(:,:)
|
||||||
! DEBUG
|
|
||||||
!double precision, allocatable :: XpY_gpu(:,:), XmY_gpu(:,:), Om_gpu(:)
|
|
||||||
|
|
||||||
double precision :: EcRPA(nspin)
|
double precision :: EcRPA(nspin)
|
||||||
|
|
||||||
@ -80,20 +76,6 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
|||||||
if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
|
if(.not.TDA) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,lambda,ERI,Bph)
|
||||||
|
|
||||||
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
||||||
|
|
||||||
!! DEBUG
|
|
||||||
!allocate(Om_gpu(nS), XpY_gpu(nS,nS), XmY_gpu(nS,nS))
|
|
||||||
!call ph_drpa_tda(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om_gpu(1), XpY_gpu(1,1))
|
|
||||||
!do i = 1, nS
|
|
||||||
! print *, i, Om(i), Om_gpu(i)
|
|
||||||
! if(dabs(Om(i) - Om_gpu(i)) .gt. 1d-13) then
|
|
||||||
! print *, 'GPU FAILED!'
|
|
||||||
! stop
|
|
||||||
! endif
|
|
||||||
!enddo
|
|
||||||
!print *, 'GPU DONE!'
|
|
||||||
!stop
|
|
||||||
|
|
||||||
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
||||||
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
||||||
|
|
||||||
|
@ -10,6 +10,8 @@ __global__ void ph_dRPA_A_sing_kernel(int nO, int nV, int nBas, int nS, double *
|
|||||||
int i_A0, i_A1, i_A2;
|
int i_A0, i_A1, i_A2;
|
||||||
int i_I0, i_I1, i_I2;
|
int i_I0, i_I1, i_I2;
|
||||||
|
|
||||||
|
bool a_eq_b;
|
||||||
|
|
||||||
nVS = nV * nS;
|
nVS = nV * nS;
|
||||||
|
|
||||||
nBas2 = nBas * nBas;
|
nBas2 = nBas * nBas;
|
||||||
@ -27,6 +29,8 @@ __global__ void ph_dRPA_A_sing_kernel(int nO, int nV, int nBas, int nS, double *
|
|||||||
while(bb < nV) {
|
while(bb < nV) {
|
||||||
b = bb + nO;
|
b = bb + nO;
|
||||||
|
|
||||||
|
a_eq_b = a == b;
|
||||||
|
|
||||||
i_A1 = i_A0 + bb;
|
i_A1 = i_A0 + bb;
|
||||||
i_I1 = i_I0 + b * nBas;
|
i_I1 = i_I0 + b * nBas;
|
||||||
|
|
||||||
@ -40,7 +44,7 @@ __global__ void ph_dRPA_A_sing_kernel(int nO, int nV, int nBas, int nS, double *
|
|||||||
while(j < nO) {
|
while(j < nO) {
|
||||||
|
|
||||||
A[i_A2 + j * nV] = 2.0 * ERI[i_I2 + j * nBas3];
|
A[i_A2 + j * nV] = 2.0 * ERI[i_I2 + j * nBas3];
|
||||||
if((a==b) && (i==j)) {
|
if(a_eq_b && (i==j)) {
|
||||||
A[i_A2 + j * nV] += eps[a] - eps[i];
|
A[i_A2 + j * nV] += eps[a] - eps[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,8 +9,8 @@
|
|||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "ph_rpa.h"
|
#include "ph_rpa.h"
|
||||||
|
|
||||||
void ph_drpa_tda(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
void ph_drpa_tda_sing(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
||||||
double *h_Omega, double *h_X) {
|
double *h_Omega, double *h_X) {
|
||||||
|
|
||||||
double *d_eps = NULL;
|
double *d_eps = NULL;
|
||||||
double *d_ERI = NULL;
|
double *d_ERI = NULL;
|
||||||
@ -20,23 +20,39 @@ void ph_drpa_tda(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
|||||||
int nBas2 = nBas * nBas;
|
int nBas2 = nBas * nBas;
|
||||||
int nBas4 = nBas2 * nBas2;
|
int nBas4 = nBas2 * nBas2;
|
||||||
|
|
||||||
|
float elapsedTime;
|
||||||
|
cudaEvent_t start, stop;
|
||||||
|
cudaEventCreate(&start);
|
||||||
|
cudaEventCreate(&stop);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
|
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
|
||||||
"cudaMalloc", __FILE__, __LINE__);
|
"cudaMalloc", __FILE__, __LINE__);
|
||||||
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
||||||
"cudaMalloc", __FILE__, __LINE__);
|
"cudaMalloc", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
cudaEventRecord(start, 0);
|
||||||
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
|
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
|
||||||
"cudaMemcpy", __FILE__, __LINE__);
|
"cudaMemcpy", __FILE__, __LINE__);
|
||||||
check_Cuda_Errors(cudaMemcpy(d_ERI, h_ERI, nBas4 * sizeof(double), cudaMemcpyHostToDevice),
|
check_Cuda_Errors(cudaMemcpy(d_ERI, h_ERI, nBas4 * sizeof(double), cudaMemcpyHostToDevice),
|
||||||
"cudaMemcpy", __FILE__, __LINE__);
|
"cudaMemcpy", __FILE__, __LINE__);
|
||||||
|
cudaEventRecord(stop, 0);
|
||||||
|
cudaEventSynchronize(stop);
|
||||||
|
cudaEventElapsedTime(&elapsedTime, start, stop);
|
||||||
|
printf("Time elapsed on CPU->GPU transfer = %f msec\n", elapsedTime);
|
||||||
|
|
||||||
// construct A
|
// construct A
|
||||||
double *d_A = NULL;
|
double *d_A = NULL;
|
||||||
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaMalloc((void**)&d_A, nS * nS * sizeof(double)), "cudaMalloc", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
cudaEventRecord(start, 0);
|
||||||
ph_dRPA_A_sing(nO, nV, nBas, nS, d_eps, d_ERI, d_A);
|
ph_dRPA_A_sing(nO, nV, nBas, nS, d_eps, d_ERI, d_A);
|
||||||
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||||
|
cudaEventRecord(stop, 0);
|
||||||
|
cudaEventSynchronize(stop);
|
||||||
|
cudaEventElapsedTime(&elapsedTime, start, stop);
|
||||||
|
printf("Time elapsed on A kernel = %f msec\n", elapsedTime);
|
||||||
|
|
||||||
|
|
||||||
// diagonalize A
|
// diagonalize A
|
||||||
@ -47,24 +63,35 @@ void ph_drpa_tda(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
|||||||
check_Cuda_Errors(cudaMalloc((void**)&d_Omega, nS * sizeof(double)),
|
check_Cuda_Errors(cudaMalloc((void**)&d_Omega, nS * sizeof(double)),
|
||||||
"cudaMalloc", __FILE__, __LINE__);
|
"cudaMalloc", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
cudaEventRecord(start, 0);
|
||||||
diag_dn_dsyevd(nS, d_info, d_Omega, d_A);
|
diag_dn_dsyevd(nS, d_info, d_Omega, d_A);
|
||||||
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||||
|
cudaEventRecord(stop, 0);
|
||||||
|
cudaEventSynchronize(stop);
|
||||||
|
cudaEventElapsedTime(&elapsedTime, start, stop);
|
||||||
|
printf("Time elapsed on diagonalization = %f msec\n", elapsedTime);
|
||||||
|
|
||||||
int info_gpu = 0;
|
//int info_gpu = 0;
|
||||||
check_Cuda_Errors(cudaMemcpy(&info_gpu, d_info, sizeof(int), cudaMemcpyDeviceToHost),
|
cudaEventRecord(start, 0);
|
||||||
"cudaMemcpy", __FILE__, __LINE__);
|
//check_Cuda_Errors(cudaMemcpy(&info_gpu, d_info, sizeof(int), cudaMemcpyDeviceToHost),
|
||||||
if (info_gpu != 0) {
|
// "cudaMemcpy", __FILE__, __LINE__);
|
||||||
printf("Error: diag_dn_dsyevd returned error code %d\n", info_gpu);
|
//if (info_gpu != 0) {
|
||||||
exit(EXIT_FAILURE);
|
// printf("Error: diag_dn_dsyevd returned error code %d\n", info_gpu);
|
||||||
}
|
// exit(EXIT_FAILURE);
|
||||||
|
//}
|
||||||
|
|
||||||
check_Cuda_Errors(cudaMemcpy(h_X, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),
|
check_Cuda_Errors(cudaMemcpy(h_X, d_A, nS * nS * sizeof(double), cudaMemcpyDeviceToHost),
|
||||||
"cudaMemcpy", __FILE__, __LINE__);
|
"cudaMemcpy", __FILE__, __LINE__);
|
||||||
|
|
||||||
check_Cuda_Errors(cudaMemcpy(h_Omega, d_Omega, nS * sizeof(double), cudaMemcpyDeviceToHost),
|
check_Cuda_Errors(cudaMemcpy(h_Omega, d_Omega, nS * sizeof(double), cudaMemcpyDeviceToHost),
|
||||||
"cudaMemcpy", __FILE__, __LINE__);
|
"cudaMemcpy", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
cudaEventRecord(start, 0);
|
||||||
|
diag_dn_dsyevd(nS, d_info, d_Omega, d_A);
|
||||||
|
check_Cuda_Errors(cudaGetLastError(), "cudaGetLastError", __FILE__, __LINE__);
|
||||||
|
cudaEventRecord(stop, 0);
|
||||||
|
cudaEventSynchronize(stop);
|
||||||
|
cudaEventElapsedTime(&elapsedTime, start, stop);
|
||||||
|
printf("Time elapsed on GPU -> CPU transfer = %f msec\n", elapsedTime);
|
||||||
|
|
||||||
check_Cuda_Errors(cudaFree(d_info), "cudaFree", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaFree(d_info), "cudaFree", __FILE__, __LINE__);
|
||||||
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
||||||
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
@ -197,6 +197,8 @@ lib_dirs = list(filter(lambda x: os.path.isdir(x) and \
|
|||||||
x not in exe_dirs, os.listdir(".")))
|
x not in exe_dirs, os.listdir(".")))
|
||||||
i = lib_dirs.index("mod")
|
i = lib_dirs.index("mod")
|
||||||
lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
|
lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
|
||||||
|
if not USE_GPU:
|
||||||
|
lib_dirs.remove("GPU")
|
||||||
|
|
||||||
def create_ninja_in_libdir(directory):
|
def create_ninja_in_libdir(directory):
|
||||||
def write_rule(f, source_file, replace):
|
def write_rule(f, source_file, replace):
|
||||||
|
@ -8,8 +8,8 @@ module cu_quack_module
|
|||||||
|
|
||||||
interface
|
interface
|
||||||
|
|
||||||
subroutine ph_drpa_tda(nO, nBas, nS, eps, ERI, &
|
subroutine ph_drpa_tda_sing(nO, nBas, nS, eps, ERI, &
|
||||||
Omega, X) bind(C, name = "ph_drpa_tda")
|
Omega, X) bind(C, name = "ph_drpa_tda_sing")
|
||||||
|
|
||||||
import c_int, c_double
|
import c_int, c_double
|
||||||
integer(c_int), intent(in), value :: nO, nBas, nS
|
integer(c_int), intent(in), value :: nO, nBas, nS
|
||||||
@ -18,7 +18,51 @@ module cu_quack_module
|
|||||||
real(c_double), intent(out) :: Omega(nS)
|
real(c_double), intent(out) :: Omega(nS)
|
||||||
real(c_double), intent(out) :: X(nS,nS)
|
real(c_double), intent(out) :: X(nS,nS)
|
||||||
|
|
||||||
end subroutine ph_drpa_tda
|
end subroutine ph_drpa_tda_sing
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
subroutine ph_drpa_tda_trip(nO, nBas, nS, eps, ERI, &
|
||||||
|
Omega, X) bind(C, name = "ph_drpa_tda_trip")
|
||||||
|
|
||||||
|
import c_int, c_double
|
||||||
|
integer(c_int), intent(in), value :: nO, nBas, nS
|
||||||
|
real(c_double), intent(in) :: eps(nBas)
|
||||||
|
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
real(c_double), intent(out) :: Omega(nS)
|
||||||
|
real(c_double), intent(out) :: X(nS,nS)
|
||||||
|
|
||||||
|
end subroutine ph_drpa_tda_trip
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
subroutine ph_drpa_sing(nO, nBas, nS, eps, ERI, &
|
||||||
|
Omega, X) bind(C, name = "ph_drpa_sing")
|
||||||
|
|
||||||
|
import c_int, c_double
|
||||||
|
integer(c_int), intent(in), value :: nO, nBas, nS
|
||||||
|
real(c_double), intent(in) :: eps(nBas)
|
||||||
|
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
real(c_double), intent(out) :: Omega(nS)
|
||||||
|
real(c_double), intent(out) :: X(nS,nS)
|
||||||
|
|
||||||
|
end subroutine ph_drpa_sing
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
subroutine ph_drpa_trip(nO, nBas, nS, eps, ERI, &
|
||||||
|
Omega, X) bind(C, name = "ph_drpa_trip")
|
||||||
|
|
||||||
|
import c_int, c_double
|
||||||
|
integer(c_int), intent(in), value :: nO, nBas, nS
|
||||||
|
real(c_double), intent(in) :: eps(nBas)
|
||||||
|
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
real(c_double), intent(out) :: Omega(nS)
|
||||||
|
real(c_double), intent(out) :: X(nS,nS)
|
||||||
|
|
||||||
|
end subroutine ph_drpa_trip
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
end interface
|
end interface
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ subroutine read_dipole_integrals(working_dir,nBas,R)
|
|||||||
else
|
else
|
||||||
|
|
||||||
do
|
do
|
||||||
read(21,*,iostat=ios) mu,nu,Dip
|
read(21, '(I5, I5, E25.17)', iostat=ios) mu, nu, Dip
|
||||||
if(ios /= 0) exit
|
if(ios /= 0) exit
|
||||||
R(mu,nu,1) = Dip
|
R(mu,nu,1) = Dip
|
||||||
R(nu,mu,1) = Dip
|
R(nu,mu,1) = Dip
|
||||||
@ -62,7 +62,7 @@ subroutine read_dipole_integrals(working_dir,nBas,R)
|
|||||||
else
|
else
|
||||||
|
|
||||||
do
|
do
|
||||||
read(22,*,iostat=ios) mu,nu,Dip
|
read(22, '(I5, I5, E25.17)', iostat=ios) mu, nu, Dip
|
||||||
if(ios /= 0) exit
|
if(ios /= 0) exit
|
||||||
R(mu,nu,2) = Dip
|
R(mu,nu,2) = Dip
|
||||||
R(nu,mu,2) = Dip
|
R(nu,mu,2) = Dip
|
||||||
@ -85,7 +85,7 @@ subroutine read_dipole_integrals(working_dir,nBas,R)
|
|||||||
else
|
else
|
||||||
|
|
||||||
do
|
do
|
||||||
read(23,*,iostat=ios) mu,nu,Dip
|
read(23, '(I5, I5, E25.17)', iostat=ios) mu, nu, Dip
|
||||||
if(ios /= 0) exit
|
if(ios /= 0) exit
|
||||||
R(mu,nu,3) = Dip
|
R(mu,nu,3) = Dip
|
||||||
R(nu,mu,3) = Dip
|
R(nu,mu,3) = Dip
|
||||||
|
Loading…
Reference in New Issue
Block a user