diff --git a/configure b/configure index 014275eb..db158966 100755 --- a/configure +++ b/configure @@ -115,19 +115,19 @@ while getopts "d:c:i:g:h" c ; do done # Handle GPU acceleration -rm -f ${QP_ROOT}/src/gpu +rm -f ${QP_ROOT}/src/gpu_arch case "$GPU" in amd) # Nvidia echo "Activating AMD GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu + ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu_arch ;; nvidia) # Nvidia echo "Activating Nvidia GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu + ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu_arch ;; *) # No Acceleration echo "Disabling GPU acceleration" - ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu + ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu_arch ;; esac diff --git a/src/ccsd/NEED b/src/ccsd/NEED index e6e6bc59..8298f28e 100644 --- a/src/ccsd/NEED +++ b/src/ccsd/NEED @@ -1,2 +1,3 @@ +gpu hartree_fock utils_cc diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 9d4ae7f9..84aab08a 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,4 +1,5 @@ subroutine run_ccsd_space_orb + use gpu implicit none @@ -11,7 +12,7 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) - double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + double precision, pointer :: H_oo, H_vv, H_vo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -55,7 +56,10 @@ subroutine run_ccsd_space_orb allocate(tau(nO,nO,nV,nV)) allocate(tau_x(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) - allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + + call gpu_allocate_double(H_oo, (/ nO, nO /) ) + call gpu_allocate_double(H_vv, (/ nV, nV /) ) + call gpu_allocate_double(H_vo, (/ nV, nO /) ) if (cc_update_method == 'diis') then double precision :: rss, diis_mem, extra_mem @@ -191,7 +195,11 @@ subroutine run_ccsd_space_orb deallocate(all_err,all_t) endif - deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + call gpu_deallocate_double(H_oo) + call gpu_deallocate_double(H_vv) + call gpu_deallocate_double(H_vo) + + deallocate(r1,r2,tau) ! CCSD(T) double precision :: e_t, e_t_err diff --git a/src/gpu/NEED b/src/gpu/NEED new file mode 100644 index 00000000..c2af78d2 --- /dev/null +++ b/src/gpu/NEED @@ -0,0 +1 @@ +gpu_arch diff --git a/src/gpu/README.rst b/src/gpu/README.rst new file mode 100644 index 00000000..17ee28a0 --- /dev/null +++ b/src/gpu/README.rst @@ -0,0 +1,6 @@ +=== +gpu +=== + +Bindings for GPU routines (architecture independent). +Architecture-dependent files are in gpu_arch. diff --git a/src/gpu_x86/gpu.h b/src/gpu/gpu.h similarity index 100% rename from src/gpu_x86/gpu.h rename to src/gpu/gpu.h diff --git a/src/gpu_x86/gpu_module.F90 b/src/gpu/gpu_module.F90 similarity index 74% rename from src/gpu_x86/gpu_module.F90 rename to src/gpu/gpu_module.F90 index 86ba3926..f35ebc97 100644 --- a/src/gpu_x86/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -1,5 +1,5 @@ module gpu - use, intrinsic :: iso_c_binding, only : c_int32_t, c_int64_t, c_double, c_size_t, c_char + use, intrinsic :: iso_c_binding implicit none interface @@ -17,7 +17,7 @@ module gpu integer(c_int64_t), value :: n end subroutine - subroutine gpu_free_c(ptr) bind(C, name='gpu_free') + subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate') import type(c_ptr) :: ptr end subroutine @@ -89,53 +89,54 @@ module gpu end interface + contains + + + subroutine gpu_allocate_double(ptr, s) + implicit none + double precision, pointer, intent(inout) :: ptr + integer, intent(in) :: s(:) + type(c_ptr) :: cptr + + call gpu_allocate_c(cptr, sum(s*1_8)*8_8) + call c_f_pointer(cptr, ptr, s) + end subroutine + + subroutine gpu_deallocate_double(ptr) + implicit none + double precision, pointer, intent(inout) :: ptr + type(c_ptr) :: cptr + cptr = c_loc(ptr) + call gpu_deallocate(cptr) + NULLIFY(ptr) + end subroutine + end module -subroutine gpu_allocate_double(ptr, s) - use gpu - implicit none - double precision, pointer, intent(inout) :: ptr - integer*8, intent(in) :: s(*) - type(c_ptr) :: cptr - - call gpu_allocate_c(cptr, sum(s)*8_8) - call c_f_pointer(cptr, ptr, s) -end subroutine - -subroutine gpu_free_double(ptr) - use gpu - implicit none - double precision, pointer, intent(inout) :: ptr - type(c_ptr) :: cptr - cptr = cloc(ptr) - call gpu_free(cptr) - NULLIFY(ptr) -end subroutine - subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n) use gpu implicit none double precision, intent(in) :: cpu_ptr(*) - double precision, intent(out) :: gpu_ptr(*) + double precision, intent(in) :: gpu_ptr(*) integer(c_int64_t), intent(in) :: n - call gpu_upload_c(cpu_ptr, gpu_ptr, 8_8*n) + call gpu_upload_c(c_loc(cpu_ptr), c_loc(gpu_ptr), 8_8*n) end subroutine subroutine gpu_download_double(gpu_ptr, cpu_ptr, n) use gpu implicit none double precision, intent(in) :: gpu_ptr(*) - double precision, intent(out) :: cpu_ptr(*) + double precision, intent(in) :: cpu_ptr(*) integer(c_int64_t), intent(in) :: n - call gpu_download_c(gpu_ptr, cpu_ptr, 8_8*n) + call gpu_download_c(c_loc(gpu_ptr), c_loc(cpu_ptr), 8_8*n) end subroutine subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n) use gpu implicit none double precision, intent(in) :: gpu_ptr_src(*) - double precision, intent(out) :: gpu_ptr_dest(*) + double precision, intent(in) :: gpu_ptr_dest(*) integer(c_int64_t), intent(in) :: n - call gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, 8_8*n) + call gpu_copy_c(c_loc(gpu_ptr_src), c_loc(gpu_ptr_dest), 8_8*n) end subroutine diff --git a/src/gpu_x86/gpu.c b/src/gpu_x86/gpu.c index 71505dbe..41ede396 100644 --- a/src/gpu_x86/gpu.c +++ b/src/gpu_x86/gpu.c @@ -25,7 +25,7 @@ void gpu_allocate(void** ptr, const int64_t n) { } } -void gpu_free(void** ptr) { +void gpu_deallocate(void** ptr) { free(*ptr); *ptr = NULL; }