mirror of
https://github.com/pfloos/quack
synced 2025-01-03 01:56:09 +01:00
refact for GPU
This commit is contained in:
parent
1a091a0707
commit
1235823334
2
input/hardware
Normal file
2
input/hardware
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# if True (T), use GPU
|
||||||
|
F
|
53
src/GPU/cu_quack_module.f90
Normal file
53
src/GPU/cu_quack_module.f90
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
module cu_quack_module
|
||||||
|
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
!#ifdef USE_GPU
|
||||||
|
! interface
|
||||||
|
! subroutine ph_drpa_tda_sing(nO, nBas, nS, eps, ERI, &
|
||||||
|
! Omega, X) bind(C, name = "ph_drpa_tda_sing")
|
||||||
|
!
|
||||||
|
! import c_int, c_double
|
||||||
|
! integer(c_int), intent(in), value :: nO, nBas, nS
|
||||||
|
! real(c_double), intent(in) :: eps(nBas)
|
||||||
|
! real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
! real(c_double), intent(out) :: Omega(nS)
|
||||||
|
! real(c_double), intent(out) :: X(nS,nS)
|
||||||
|
!
|
||||||
|
! end subroutine ph_drpa_tda_sing
|
||||||
|
! end interface
|
||||||
|
!#else
|
||||||
|
! interface
|
||||||
|
! subroutine ph_drpa_tda_sing(nO, nBas, nS, eps, ERI, Omega, X)
|
||||||
|
! integer, intent(in) :: nO, nBas, nS
|
||||||
|
! double precision, intent(in) :: eps(nBas)
|
||||||
|
! double precision, intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
! double precision, intent(out) :: Omega(nS)
|
||||||
|
! double precision, intent(out) :: X(nS,nS)
|
||||||
|
! end subroutine ph_drpa_tda_sing
|
||||||
|
! end interface
|
||||||
|
!#endif
|
||||||
|
|
||||||
|
interface
|
||||||
|
|
||||||
|
subroutine ph_drpa_tda_sing(nO, nBas, nS, eps, ERI, &
|
||||||
|
Omega, X) bind(C, name = "ph_drpa_tda_sing")
|
||||||
|
|
||||||
|
import c_int, c_double
|
||||||
|
integer(c_int), intent(in), value :: nO, nBas, nS
|
||||||
|
real(c_double), intent(in) :: eps(nBas)
|
||||||
|
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
real(c_double), intent(out) :: Omega(nS)
|
||||||
|
real(c_double), intent(out) :: X(nS,nS)
|
||||||
|
|
||||||
|
end subroutine ph_drpa_tda_sing
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
end module cu_quack_module
|
||||||
|
|
||||||
|
|
@ -39,10 +39,7 @@ subroutine phLR(TDA,nS,Aph,Bph,EcRPA,Om,XpY,XmY)
|
|||||||
if(TDA) then
|
if(TDA) then
|
||||||
|
|
||||||
XpY(:,:) = Aph(:,:)
|
XpY(:,:) = Aph(:,:)
|
||||||
!call wall_time(t1)
|
|
||||||
call diagonalize_matrix(nS,XpY,Om)
|
call diagonalize_matrix(nS,XpY,Om)
|
||||||
!call wall_time(t2)
|
|
||||||
!print*, 'diag time on CPU (sec):', t2 - t1
|
|
||||||
XpY(:,:) = transpose(XpY(:,:))
|
XpY(:,:) = transpose(XpY(:,:))
|
||||||
XmY(:,:) = XpY(:,:)
|
XmY(:,:) = XpY(:,:)
|
||||||
|
|
||||||
|
@ -44,6 +44,8 @@ program QuAcK
|
|||||||
|
|
||||||
logical :: reg_MP
|
logical :: reg_MP
|
||||||
|
|
||||||
|
logical :: use_gpu
|
||||||
|
|
||||||
integer :: maxSCF_CC,max_diis_CC
|
integer :: maxSCF_CC,max_diis_CC
|
||||||
double precision :: thresh_CC
|
double precision :: thresh_CC
|
||||||
|
|
||||||
@ -134,6 +136,12 @@ program QuAcK
|
|||||||
doACFDT,exchange_kernel,doXBS, &
|
doACFDT,exchange_kernel,doXBS, &
|
||||||
dophBSE,dophBSE2,doppBSE,dBSE,dTDA)
|
dophBSE,dophBSE2,doppBSE,dBSE,dTDA)
|
||||||
|
|
||||||
|
!------------------!
|
||||||
|
! Hardware !
|
||||||
|
!------------------!
|
||||||
|
|
||||||
|
call read_hardware(working_dir,use_gpu)
|
||||||
|
|
||||||
!------------------------------------!
|
!------------------------------------!
|
||||||
! Read input information !
|
! Read input information !
|
||||||
!------------------------------------!
|
!------------------------------------!
|
||||||
@ -218,7 +226,7 @@ program QuAcK
|
|||||||
!-------------------------!
|
!-------------------------!
|
||||||
|
|
||||||
if(doRQuAcK) &
|
if(doRQuAcK) &
|
||||||
call RQuAcK(doRtest,doRHF,doROHF,dostab,dosearch,doMP2,doMP3,doCCD,dopCCD,doDCD,doCCSD,doCCSDT, &
|
call RQuAcK(use_gpu,doRtest,doRHF,doROHF,dostab,dosearch,doMP2,doMP3,doCCD,dopCCD,doDCD,doCCSD,doCCSDT, &
|
||||||
dodrCCD,dorCCD,docrCCD,dolCCD,doCIS,doCIS_D,doCID,doCISD,doFCI,dophRPA,dophRPAx,docrRPA,doppRPA, &
|
dodrCCD,dorCCD,docrCCD,dolCCD,doCIS,doCIS_D,doCID,doCISD,doFCI,dophRPA,dophRPAx,docrRPA,doppRPA, &
|
||||||
doG0F2,doevGF2,doqsGF2,doufG0F02,doG0F3,doevGF3,doG0W0,doevGW,doqsGW,doufG0W0,doufGW, &
|
doG0F2,doevGF2,doqsGF2,doufG0F02,doG0F3,doevGF3,doG0W0,doevGW,doqsGW,doufG0W0,doufGW, &
|
||||||
doG0T0pp,doevGTpp,doqsGTpp,doufG0T0pp,doG0T0eh,doevGTeh,doqsGTeh, &
|
doG0T0pp,doevGTpp,doqsGTpp,doufG0T0pp,doG0T0eh,doevGTeh,doqsGTeh, &
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
subroutine RQuAcK(dotest,doRHF,doROHF,dostab,dosearch,doMP2,doMP3,doCCD,dopCCD,doDCD,doCCSD,doCCSDT, &
|
subroutine RQuAcK(use_gpu,dotest,doRHF,doROHF,dostab,dosearch,doMP2,doMP3,doCCD,dopCCD,doDCD,doCCSD,doCCSDT, &
|
||||||
dodrCCD,dorCCD,docrCCD,dolCCD,doCIS,doCIS_D,doCID,doCISD,doFCI,dophRPA,dophRPAx,docrRPA,doppRPA, &
|
dodrCCD,dorCCD,docrCCD,dolCCD,doCIS,doCIS_D,doCID,doCISD,doFCI,dophRPA,dophRPAx,docrRPA,doppRPA, &
|
||||||
doG0F2,doevGF2,doqsGF2,doufG0F02,doG0F3,doevGF3,doG0W0,doevGW,doqsGW,doufG0W0,doufGW, &
|
doG0F2,doevGF2,doqsGF2,doufG0F02,doG0F3,doevGF3,doG0W0,doevGW,doqsGW,doufG0W0,doufGW, &
|
||||||
doG0T0pp,doevGTpp,doqsGTpp,doufG0T0pp,doG0T0eh,doevGTeh,doqsGTeh, &
|
doG0T0pp,doevGTpp,doqsGTpp,doufG0T0pp,doG0T0eh,doevGTeh,doqsGTeh, &
|
||||||
@ -14,6 +14,8 @@ subroutine RQuAcK(dotest,doRHF,doROHF,dostab,dosearch,doMP2,doMP3,doCCD,dopCCD,d
|
|||||||
implicit none
|
implicit none
|
||||||
include 'parameters.h'
|
include 'parameters.h'
|
||||||
|
|
||||||
|
logical,intent(in) :: use_gpu
|
||||||
|
|
||||||
logical,intent(in) :: dotest
|
logical,intent(in) :: dotest
|
||||||
|
|
||||||
logical,intent(in) :: doRHF,doROHF
|
logical,intent(in) :: doRHF,doROHF
|
||||||
@ -274,7 +276,7 @@ subroutine RQuAcK(dotest,doRHF,doROHF,dostab,dosearch,doMP2,doMP3,doCCD,dopCCD,d
|
|||||||
if(doRPA) then
|
if(doRPA) then
|
||||||
|
|
||||||
call wall_time(start_RPA)
|
call wall_time(start_RPA)
|
||||||
call RRPA(dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_kernel,singlet,triplet, &
|
call RRPA(use_gpu,dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_kernel,singlet,triplet, &
|
||||||
nOrb,nC,nO,nV,nR,nS,ENuc,ERHF,ERI_MO,dipole_int_MO,eHF)
|
nOrb,nC,nO,nV,nR,nS,ENuc,ERHF,ERI_MO,dipole_int_MO,eHF)
|
||||||
call wall_time(end_RPA)
|
call wall_time(end_RPA)
|
||||||
|
|
||||||
|
45
src/QuAcK/read_hardware.f90
Normal file
45
src/QuAcK/read_hardware.f90
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
subroutine read_hardware(working_dir,use_gpu)
|
||||||
|
|
||||||
|
! Read desired methods
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Input variables
|
||||||
|
|
||||||
|
character(len=256),intent(in) :: working_dir
|
||||||
|
|
||||||
|
! Output variables
|
||||||
|
|
||||||
|
logical,intent(out) :: use_gpu
|
||||||
|
|
||||||
|
! Local variables
|
||||||
|
|
||||||
|
character(len=1) :: ans
|
||||||
|
integer :: ios
|
||||||
|
character(len=256) :: file_path
|
||||||
|
|
||||||
|
! Open file with method specification
|
||||||
|
|
||||||
|
file_path = trim(working_dir) // '/input/hardware'
|
||||||
|
open(unit=1, file=file_path, status='old', action='read', iostat=ios)
|
||||||
|
|
||||||
|
if(ios /= 0) then
|
||||||
|
|
||||||
|
use_gpu = .False.
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
read(1,*)
|
||||||
|
read(1,*) ans
|
||||||
|
if(ans == 'T') then
|
||||||
|
use_gpu = .true.
|
||||||
|
else
|
||||||
|
use_gpu = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Close file with options
|
||||||
|
close(unit=1)
|
||||||
|
|
||||||
|
end subroutine
|
@ -1,4 +1,4 @@
|
|||||||
subroutine RRPA(dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_kernel,singlet,triplet, &
|
subroutine RRPA(use_gpu,dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_kernel,singlet,triplet, &
|
||||||
nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
|
||||||
! Random-phase approximation module
|
! Random-phase approximation module
|
||||||
@ -8,6 +8,8 @@ subroutine RRPA(dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_ker
|
|||||||
|
|
||||||
! Input variables
|
! Input variables
|
||||||
|
|
||||||
|
logical,intent(in) :: use_gpu
|
||||||
|
|
||||||
logical,intent(in) :: dotest
|
logical,intent(in) :: dotest
|
||||||
|
|
||||||
logical,intent(in) :: dophRPA
|
logical,intent(in) :: dophRPA
|
||||||
@ -43,15 +45,17 @@ subroutine RRPA(dotest,dophRPA,dophRPAx,docrRPA,doppRPA,TDA,doACFDT,exchange_ker
|
|||||||
if(dophRPA) then
|
if(dophRPA) then
|
||||||
|
|
||||||
call wall_time(start_RPA)
|
call wall_time(start_RPA)
|
||||||
call phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
if (use_gpu) then
|
||||||
|
call phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
else
|
||||||
|
call phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
endif
|
||||||
call wall_time(end_RPA)
|
call wall_time(end_RPA)
|
||||||
|
|
||||||
t_RPA = end_RPA - start_RPA
|
t_RPA = end_RPA - start_RPA
|
||||||
write(*,'(A65,1X,F9.3,A8)') 'Total wall time for RPA = ',t_RPA,' seconds'
|
write(*,'(A65,1X,F9.3,A8)') 'Total wall time for RPA = ',t_RPA,' seconds'
|
||||||
write(*,*)
|
write(*,*)
|
||||||
|
|
||||||
!call phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
|
||||||
|
|
||||||
end if
|
end if
|
||||||
|
|
||||||
!------------------------------------------------------------------------
|
!------------------------------------------------------------------------
|
||||||
|
@ -83,6 +83,7 @@ subroutine phRRPA(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,
|
|||||||
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
call phLR(TDA,nS,Aph,Bph,EcRPA(ispin),Om,XpY,XmY)
|
||||||
!call wall_time(t2)
|
!call wall_time(t2)
|
||||||
!print *, "wall time diag A on CPU (sec) = ", t2 - t1
|
!print *, "wall time diag A on CPU (sec) = ", t2 - t1
|
||||||
|
!stop
|
||||||
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
call print_excitation_energies('phRPA@RHF','singlet',nS,Om)
|
||||||
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
call phLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nS,dipole_int,Om,XpY,XmY)
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#ifdef USE_GPU
|
||||||
|
|
||||||
subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
|
||||||
use cu_quack_module
|
use cu_quack_module
|
||||||
@ -69,7 +71,8 @@ subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC
|
|||||||
|
|
||||||
! Memory allocation
|
! Memory allocation
|
||||||
|
|
||||||
allocate(Om(nS),XpY(nS,nS),XmY(nS,nS),Aph(nS,nS),Bph(nS,nS))
|
allocate(Om(nS),XpY(nS,nS),XmY(nS,nS))
|
||||||
|
!allocate(Aph(nS,nS),Bph(nS,nS))
|
||||||
|
|
||||||
! Singlet manifold
|
! Singlet manifold
|
||||||
|
|
||||||
@ -77,6 +80,7 @@ subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC
|
|||||||
|
|
||||||
if(TDA) then
|
if(TDA) then
|
||||||
|
|
||||||
|
print*, 'start diag on GPU:'
|
||||||
call wall_time(t1)
|
call wall_time(t1)
|
||||||
call ph_drpa_tda_sing(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om(1), XpY(1,1))
|
call ph_drpa_tda_sing(nO, nBas, nS, eHF(1), ERI(1,1,1,1), Om(1), XpY(1,1))
|
||||||
call wall_time(t2)
|
call wall_time(t2)
|
||||||
@ -154,3 +158,34 @@ subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC
|
|||||||
end if
|
end if
|
||||||
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
subroutine phRRPA_GPU(dotest,TDA,doACFDT,exchange_kernel,singlet,triplet,nBas,nC,nO,nV,nR,nS,ENuc,ERHF,ERI,dipole_int,eHF)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
include 'parameters.h'
|
||||||
|
include 'quadrature.h'
|
||||||
|
|
||||||
|
logical,intent(in) :: dotest
|
||||||
|
logical,intent(in) :: TDA
|
||||||
|
logical,intent(in) :: doACFDT
|
||||||
|
logical,intent(in) :: exchange_kernel
|
||||||
|
logical,intent(in) :: singlet
|
||||||
|
logical,intent(in) :: triplet
|
||||||
|
integer,intent(in) :: nBas
|
||||||
|
integer,intent(in) :: nC
|
||||||
|
integer,intent(in) :: nO
|
||||||
|
integer,intent(in) :: nV
|
||||||
|
integer,intent(in) :: nR
|
||||||
|
integer,intent(in) :: nS
|
||||||
|
double precision,intent(in) :: ENuc
|
||||||
|
double precision,intent(in) :: ERHF
|
||||||
|
double precision,intent(in) :: eHF(nBas)
|
||||||
|
double precision,intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
||||||
|
double precision,intent(in) :: dipole_int(nBas,nBas,ncart)
|
||||||
|
print*, "compile with USE_GPU FLAG!"
|
||||||
|
stop
|
||||||
|
end
|
||||||
|
|
||||||
|
#endif
|
@ -75,6 +75,8 @@ extern "C" void ph_dRPA_A_sing(int nO, int nV, int nBas, int nS, double *eps, do
|
|||||||
dim3 dimGrid(nBlocks, nBlocks, 1);
|
dim3 dimGrid(nBlocks, nBlocks, 1);
|
||||||
dim3 dimBlock(sBlocks, sBlocks, 1);
|
dim3 dimBlock(sBlocks, sBlocks, 1);
|
||||||
|
|
||||||
|
//dim3 dimGrid(nBlocks, 1, 1);
|
||||||
|
//dim3 dimBlock(sBlocks, 1, 1);
|
||||||
|
|
||||||
printf("lunching ph_dRPA_A_sing_kernel with %dx%d blocks and %dx%d threads/block\n",
|
printf("lunching ph_dRPA_A_sing_kernel with %dx%d blocks and %dx%d threads/block\n",
|
||||||
nBlocks, nBlocks, sBlocks, sBlocks);
|
nBlocks, nBlocks, sBlocks, sBlocks);
|
||||||
|
@ -17,14 +17,16 @@ void ph_drpa_tda_sing(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
|||||||
|
|
||||||
int nV = nBas - nO;
|
int nV = nBas - nO;
|
||||||
|
|
||||||
int nBas2 = nBas * nBas;
|
long long nBas_long = (long long) nBas;
|
||||||
int nBas4 = nBas2 * nBas2;
|
long long nBas4 = nBas_long * nBas_long * nBas_long * nBas_long;
|
||||||
|
|
||||||
float elapsedTime;
|
float elapsedTime;
|
||||||
cudaEvent_t start, stop;
|
cudaEvent_t start, stop;
|
||||||
cudaEventCreate(&start);
|
cudaEventCreate(&start);
|
||||||
cudaEventCreate(&stop);
|
cudaEventCreate(&stop);
|
||||||
|
|
||||||
|
//printf("nO = %d, nBas = %d, nS = %d\n", nO, nBas, nS);
|
||||||
|
//printf("nBas4 = %lld\n", nBas4);
|
||||||
|
|
||||||
|
|
||||||
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
|
check_Cuda_Errors(cudaMalloc((void**)&d_eps, nBas * sizeof(double)),
|
||||||
@ -32,6 +34,7 @@ void ph_drpa_tda_sing(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
|||||||
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
check_Cuda_Errors(cudaMalloc((void**)&d_ERI, nBas4 * sizeof(double)),
|
||||||
"cudaMalloc", __FILE__, __LINE__);
|
"cudaMalloc", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
printf("CPU->GPU transfer..\n");
|
||||||
cudaEventRecord(start, 0);
|
cudaEventRecord(start, 0);
|
||||||
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
|
check_Cuda_Errors(cudaMemcpy(d_eps, h_eps, nBas * sizeof(double), cudaMemcpyHostToDevice),
|
||||||
"cudaMemcpy", __FILE__, __LINE__);
|
"cudaMemcpy", __FILE__, __LINE__);
|
||||||
@ -55,6 +58,10 @@ void ph_drpa_tda_sing(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
|||||||
printf("Time elapsed on A kernel = %f msec\n", elapsedTime);
|
printf("Time elapsed on A kernel = %f msec\n", elapsedTime);
|
||||||
|
|
||||||
|
|
||||||
|
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
||||||
|
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
|
||||||
// diagonalize A
|
// diagonalize A
|
||||||
int *d_info = NULL;
|
int *d_info = NULL;
|
||||||
double *d_Omega = NULL;
|
double *d_Omega = NULL;
|
||||||
@ -89,8 +96,6 @@ void ph_drpa_tda_sing(int nO, int nBas, int nS, double *h_eps, double *h_ERI,
|
|||||||
printf("Time elapsed on GPU -> CPU transfer = %f msec\n", elapsedTime);
|
printf("Time elapsed on GPU -> CPU transfer = %f msec\n", elapsedTime);
|
||||||
|
|
||||||
check_Cuda_Errors(cudaFree(d_info), "cudaFree", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaFree(d_info), "cudaFree", __FILE__, __LINE__);
|
||||||
check_Cuda_Errors(cudaFree(d_eps), "cudaFree", __FILE__, __LINE__);
|
|
||||||
check_Cuda_Errors(cudaFree(d_ERI), "cudaFree", __FILE__, __LINE__);
|
|
||||||
check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaFree(d_A), "cudaFree", __FILE__, __LINE__);
|
||||||
check_Cuda_Errors(cudaFree(d_Omega), "cudaFree", __FILE__, __LINE__);
|
check_Cuda_Errors(cudaFree(d_Omega), "cudaFree", __FILE__, __LINE__);
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ def check_compiler_exists(compiler):
|
|||||||
compile_gfortran_mac = """
|
compile_gfortran_mac = """
|
||||||
FC = gfortran
|
FC = gfortran
|
||||||
AR = libtool -static -o
|
AR = libtool -static -o
|
||||||
FFLAGS = -I$IDIR -J$IDIR -fbacktrace -g -Wall -Wno-unused-variable -Wno-unused -Wno-unused-dummy-argument -Wuninitialized -Wmaybe-uninitialized -O3 -march=native
|
FFLAGS = -I$IDIR -J$IDIR -cpp -fbacktrace -g -Wall -Wno-unused-variable -Wno-unused -Wno-unused-dummy-argument -Wuninitialized -Wmaybe-uninitialized -O3 -march=native
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CXX = g++
|
CXX = g++
|
||||||
LAPACK=-lblas -llapack
|
LAPACK=-lblas -llapack
|
||||||
@ -47,7 +47,7 @@ FIX_ORDER_OF_LIBS=
|
|||||||
compile_gfortran_mac_debug = """
|
compile_gfortran_mac_debug = """
|
||||||
FC = gfortran
|
FC = gfortran
|
||||||
AR = libtool -static -o
|
AR = libtool -static -o
|
||||||
FFLAGS = -I$IDIR -J$IDIR -fbacktrace -Wall -Wno-unused-variable -g -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant
|
FFLAGS = -I$IDIR -J$IDIR -cpp -fbacktrace -Wall -Wno-unused-variable -g -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CXX = g++
|
CXX = g++
|
||||||
LAPACK=-lblas -llapack
|
LAPACK=-lblas -llapack
|
||||||
@ -58,7 +58,7 @@ FIX_ORDER_OF_LIBS=
|
|||||||
compile_gfortran_linux_debug = """
|
compile_gfortran_linux_debug = """
|
||||||
FC = gfortran
|
FC = gfortran
|
||||||
AR = ar crs
|
AR = ar crs
|
||||||
FFLAGS = -I$IDIR -J$IDIR -fbacktrace -Wall -g -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant
|
FFLAGS = -I$IDIR -J$IDIR -cpp -fbacktrace -Wall -g -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CXX = g++
|
CXX = g++
|
||||||
LAPACK=-lblas -llapack
|
LAPACK=-lblas -llapack
|
||||||
@ -83,7 +83,7 @@ elif sys.platform.lower() == "linux" or os.path.exists('/proc/version'):
|
|||||||
compiler = """
|
compiler = """
|
||||||
FC = ifort -mkl=parallel -qopenmp
|
FC = ifort -mkl=parallel -qopenmp
|
||||||
AR = ar crs
|
AR = ar crs
|
||||||
FFLAGS = -I$IDIR -module $IDIR -traceback -g -Ofast -xHost
|
FFLAGS = -I$IDIR -module $IDIR -fpp -traceback -g -Ofast -xHost
|
||||||
CC = icc
|
CC = icc
|
||||||
CXX = icpc
|
CXX = icpc
|
||||||
LAPACK=
|
LAPACK=
|
||||||
@ -94,10 +94,12 @@ FIX_ORDER_OF_LIBS=-Wl,--start-group
|
|||||||
compiler = """
|
compiler = """
|
||||||
FC = gfortran -fopenmp
|
FC = gfortran -fopenmp
|
||||||
AR = ar crs
|
AR = ar crs
|
||||||
FFLAGS = -I$IDIR -J$IDIR -fbacktrace -g -Wall -Wno-unused-variable -Wno-unused -Wno-unused-dummy-argument -Wuninitialized -Wmaybe-uninitialized -O3 -march=native
|
FFLAGS = -I$IDIR -J$IDIR -cpp -fbacktrace -g -Wall -Wno-unused-variable -Wno-unused -Wno-unused-dummy-argument -Wuninitialized -Wmaybe-uninitialized -O3 -march=native
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CXX = g++
|
CXX = g++
|
||||||
LAPACK=-lblas -llapack
|
LAPACK=-lblas -llapack
|
||||||
|
# uncomment for TURPAN
|
||||||
|
#LAPACK=-larmpl_lp64_mp
|
||||||
STDCXX=-lstdc++
|
STDCXX=-lstdc++
|
||||||
FIX_ORDER_OF_LIBS=-Wl,--start-group
|
FIX_ORDER_OF_LIBS=-Wl,--start-group
|
||||||
"""
|
"""
|
||||||
@ -113,8 +115,16 @@ if USE_GPU:
|
|||||||
compiler_tmp = compiler.strip().split('\n')
|
compiler_tmp = compiler.strip().split('\n')
|
||||||
compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas -lcusolver".format(QUACK_ROOT)
|
compiler_tmp[0] += " -L{}/src/cuda/build -lcuquack -lcudart -lcublas -lcusolver".format(QUACK_ROOT)
|
||||||
compiler_exe = '\n'.join(compiler_tmp)
|
compiler_exe = '\n'.join(compiler_tmp)
|
||||||
|
|
||||||
|
compiler_tmp = compiler.strip().split('\n')
|
||||||
|
compiler_tmp[2] += " -DUSE_GPU"
|
||||||
|
compiler_lib = '\n'.join(compiler_tmp)
|
||||||
|
|
||||||
|
compiler_main = compiler_tmp
|
||||||
else:
|
else:
|
||||||
compiler_exe = compiler
|
compiler_exe = compiler
|
||||||
|
compiler_lib = compiler
|
||||||
|
compiler_main = compiler
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -172,7 +182,7 @@ rule git_clone
|
|||||||
|
|
||||||
build_in_lib_dir = "\n".join([
|
build_in_lib_dir = "\n".join([
|
||||||
header,
|
header,
|
||||||
compiler,
|
compiler_lib,
|
||||||
rule_fortran,
|
rule_fortran,
|
||||||
rule_build_lib,
|
rule_build_lib,
|
||||||
])
|
])
|
||||||
@ -187,7 +197,7 @@ build_in_exe_dir = "\n".join([
|
|||||||
|
|
||||||
build_main = "\n".join([
|
build_main = "\n".join([
|
||||||
header,
|
header,
|
||||||
compiler,
|
compiler_main,
|
||||||
rule_git_clone,
|
rule_git_clone,
|
||||||
])
|
])
|
||||||
|
|
||||||
@ -195,9 +205,10 @@ exe_dirs = ["QuAcK"]
|
|||||||
lib_dirs = list(filter(lambda x: os.path.isdir(x) and \
|
lib_dirs = list(filter(lambda x: os.path.isdir(x) and \
|
||||||
x not in ["cuda"] and \
|
x not in ["cuda"] and \
|
||||||
x not in exe_dirs, os.listdir(".")))
|
x not in exe_dirs, os.listdir(".")))
|
||||||
i = lib_dirs.index("mod")
|
if(USE_GPU):
|
||||||
lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
|
i = lib_dirs.index("GPU")
|
||||||
if not USE_GPU:
|
lib_dirs[0], lib_dirs[i] = lib_dirs[i], lib_dirs[0]
|
||||||
|
else:
|
||||||
lib_dirs.remove("GPU")
|
lib_dirs.remove("GPU")
|
||||||
|
|
||||||
def create_ninja_in_libdir(directory):
|
def create_ninja_in_libdir(directory):
|
||||||
|
@ -1,82 +0,0 @@
|
|||||||
module cu_quack_module
|
|
||||||
|
|
||||||
use, intrinsic :: iso_c_binding
|
|
||||||
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
interface
|
|
||||||
|
|
||||||
subroutine ph_drpa_tda_sing(nO, nBas, nS, eps, ERI, &
|
|
||||||
Omega, X) bind(C, name = "ph_drpa_tda_sing")
|
|
||||||
|
|
||||||
import c_int, c_double
|
|
||||||
integer(c_int), intent(in), value :: nO, nBas, nS
|
|
||||||
real(c_double), intent(in) :: eps(nBas)
|
|
||||||
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
|
||||||
real(c_double), intent(out) :: Omega(nS)
|
|
||||||
real(c_double), intent(out) :: X(nS,nS)
|
|
||||||
|
|
||||||
end subroutine ph_drpa_tda_sing
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
subroutine ph_drpa_tda_trip(nO, nBas, nS, eps, ERI, &
|
|
||||||
Omega, X) bind(C, name = "ph_drpa_tda_trip")
|
|
||||||
|
|
||||||
import c_int, c_double
|
|
||||||
integer(c_int), intent(in), value :: nO, nBas, nS
|
|
||||||
real(c_double), intent(in) :: eps(nBas)
|
|
||||||
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
|
||||||
real(c_double), intent(out) :: Omega(nS)
|
|
||||||
real(c_double), intent(out) :: X(nS,nS)
|
|
||||||
|
|
||||||
end subroutine ph_drpa_tda_trip
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
subroutine ph_drpa_sing(nO, nBas, nS, eps, ERI, &
|
|
||||||
Omega, X) bind(C, name = "ph_drpa_sing")
|
|
||||||
|
|
||||||
import c_int, c_double
|
|
||||||
integer(c_int), intent(in), value :: nO, nBas, nS
|
|
||||||
real(c_double), intent(in) :: eps(nBas)
|
|
||||||
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
|
||||||
real(c_double), intent(out) :: Omega(nS)
|
|
||||||
real(c_double), intent(out) :: X(nS,nS)
|
|
||||||
|
|
||||||
end subroutine ph_drpa_sing
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
subroutine ph_drpa_trip(nO, nBas, nS, eps, ERI, &
|
|
||||||
Omega, X) bind(C, name = "ph_drpa_trip")
|
|
||||||
|
|
||||||
import c_int, c_double
|
|
||||||
integer(c_int), intent(in), value :: nO, nBas, nS
|
|
||||||
real(c_double), intent(in) :: eps(nBas)
|
|
||||||
real(c_double), intent(in) :: ERI(nBas,nBas,nBas,nBas)
|
|
||||||
real(c_double), intent(out) :: Omega(nS)
|
|
||||||
real(c_double), intent(out) :: X(nS,nS)
|
|
||||||
|
|
||||||
end subroutine ph_drpa_trip
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
end interface
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
contains
|
|
||||||
|
|
||||||
subroutine cu_quack_module_test()
|
|
||||||
implicit none
|
|
||||||
print*, ' hello from cu_quack_module'
|
|
||||||
end subroutine cu_quack_module_test
|
|
||||||
|
|
||||||
! ---
|
|
||||||
|
|
||||||
end module cu_quack_module
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user