Separated gpu and gpu_arch

2024-12-22 12:23:43 +01:00 · 2024-06-27 12:06:06 +02:00 · 2024-06-27 12:06:06 +02:00 · 5d80cb7b2d
commit 5d80cb7b2d
parent a9d2f0e188
8 changed files with 54 additions and 37 deletions
--- a/8
+++ b/8
@ -115,19 +115,19 @@ while getopts "d:c:i:g:h" c ; do
 done

 # Handle GPU acceleration
-rm -f ${QP_ROOT}/src/gpu
+rm -f ${QP_ROOT}/src/gpu_arch
 case "$GPU" in
  amd) # Nvidia
     echo "Activating AMD GPU acceleration"
-     ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu
+     ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu_arch
     ;;
  nvidia) # Nvidia
     echo "Activating Nvidia GPU acceleration"
-     ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu
+     ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu_arch
     ;;
  *) # No Acceleration
     echo "Disabling GPU acceleration"
-     ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu
+     ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu_arch
     ;;
 esac

--- a/src/ccsd/NEED
+++ b/src/ccsd/NEED
@ -1,2 +1,3 @@
+gpu
 hartree_fock
 utils_cc
--- a/src/ccsd/ccsd_space_orb_sub.irp.f
+++ b/src/ccsd/ccsd_space_orb_sub.irp.f
@ -1,4 +1,5 @@
 subroutine run_ccsd_space_orb
+  use gpu

  implicit none

@ -11,7 +12,7 @@ subroutine run_ccsd_space_orb

  double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:)
  double precision, allocatable :: t1(:,:), r1(:,:)
-  double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
+  double precision, pointer     :: H_oo, H_vv, H_vo

  double precision, allocatable :: all_err(:,:), all_t(:,:)
  integer, allocatable          :: list_occ(:), list_vir(:)
@ -55,7 +56,10 @@ subroutine run_ccsd_space_orb
  allocate(tau(nO,nO,nV,nV))
  allocate(tau_x(nO,nO,nV,nV))
  allocate(t1(nO,nV), r1(nO,nV))
-  allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))
+
+  call gpu_allocate_double(H_oo, (/ nO, nO /) )
+  call gpu_allocate_double(H_vv, (/ nV, nV /) )
+  call gpu_allocate_double(H_vo, (/ nV, nO /) )

  if (cc_update_method == 'diis') then
    double precision :: rss, diis_mem, extra_mem
@ -191,7 +195,11 @@ subroutine run_ccsd_space_orb
    deallocate(all_err,all_t)
  endif

-  deallocate(H_vv,H_oo,H_vo,r1,r2,tau)
+  call gpu_deallocate_double(H_oo)
+  call gpu_deallocate_double(H_vv)
+  call gpu_deallocate_double(H_vo)
+
+  deallocate(r1,r2,tau)

  ! CCSD(T)
  double precision :: e_t, e_t_err
--- a/src/gpu/NEED
+++ b/src/gpu/NEED
@ -0,0 +1 @@
+gpu_arch
--- a/src/gpu/README.rst
+++ b/src/gpu/README.rst
@ -0,0 +1,6 @@
+===
+gpu
+===
+
+Bindings for GPU routines (architecture independent).
+Architecture-dependent files are in gpu_arch.
--- a/src/gpu_x86/gpu.h
+++ b/src/gpu_x86/gpu.h
--- a/src/gpu_x86/gpu_module.F90
+++ b/src/gpu_x86/gpu_module.F90
@ -1,5 +1,5 @@
 module gpu
-  use, intrinsic :: iso_c_binding, only : c_int32_t, c_int64_t, c_double, c_size_t, c_char
+  use, intrinsic :: iso_c_binding
  implicit none

  interface
@ -17,7 +17,7 @@ module gpu
      integer(c_int64_t), value :: n
    end subroutine

-    subroutine gpu_free_c(ptr) bind(C, name='gpu_free')
+    subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate')
      import
      type(c_ptr) :: ptr
    end subroutine
@ -89,53 +89,54 @@ module gpu

  end interface

+  contains
+
+
+    subroutine gpu_allocate_double(ptr, s)
+      implicit none
+      double precision, pointer, intent(inout) :: ptr
+      integer, intent(in) :: s(:)
+      type(c_ptr) :: cptr
+
+      call gpu_allocate_c(cptr, sum(s*1_8)*8_8)
+      call c_f_pointer(cptr, ptr, s)
+    end subroutine
+
+    subroutine gpu_deallocate_double(ptr)
+      implicit none
+      double precision, pointer, intent(inout) :: ptr
+      type(c_ptr) :: cptr
+      cptr = c_loc(ptr)
+      call gpu_deallocate(cptr)
+      NULLIFY(ptr)
+    end subroutine
+
 end module

-subroutine gpu_allocate_double(ptr, s)
-  use gpu
-  implicit none
-  double precision, pointer, intent(inout) :: ptr
-  integer*8, intent(in) :: s(*)
-  type(c_ptr) :: cptr
-
-  call gpu_allocate_c(cptr, sum(s)*8_8)
-  call c_f_pointer(cptr, ptr, s)
-end subroutine
-
-subroutine gpu_free_double(ptr)
-  use gpu
-  implicit none
-  double precision, pointer, intent(inout) :: ptr
-  type(c_ptr) :: cptr
-  cptr = cloc(ptr)
-  call gpu_free(cptr)
-  NULLIFY(ptr)
-end subroutine
-
 subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n)
  use gpu
  implicit none
  double precision, intent(in)   :: cpu_ptr(*)
-  double precision, intent(out)  :: gpu_ptr(*)
+  double precision, intent(in)   :: gpu_ptr(*)
  integer(c_int64_t), intent(in) :: n
-  call gpu_upload_c(cpu_ptr, gpu_ptr, 8_8*n)
+  call gpu_upload_c(c_loc(cpu_ptr), c_loc(gpu_ptr), 8_8*n)
 end subroutine

 subroutine gpu_download_double(gpu_ptr, cpu_ptr, n)
  use gpu
  implicit none
  double precision, intent(in)   :: gpu_ptr(*)
-  double precision, intent(out)  :: cpu_ptr(*)
+  double precision, intent(in)   :: cpu_ptr(*)
  integer(c_int64_t), intent(in) :: n
-  call gpu_download_c(gpu_ptr, cpu_ptr, 8_8*n)
+  call gpu_download_c(c_loc(gpu_ptr), c_loc(cpu_ptr), 8_8*n)
 end subroutine

 subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n)
  use gpu
  implicit none
  double precision, intent(in)   :: gpu_ptr_src(*)
-  double precision, intent(out)  :: gpu_ptr_dest(*)
+  double precision, intent(in)   :: gpu_ptr_dest(*)
  integer(c_int64_t), intent(in) :: n
-  call gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, 8_8*n)
+  call gpu_copy_c(c_loc(gpu_ptr_src), c_loc(gpu_ptr_dest), 8_8*n)
 end subroutine

--- a/src/gpu_x86/gpu.c
+++ b/src/gpu_x86/gpu.c
@ -25,7 +25,7 @@ void gpu_allocate(void** ptr, const int64_t n) {
  }
 }

-void gpu_free(void** ptr) {
+void gpu_deallocate(void** ptr) {
  free(*ptr);
  *ptr = NULL;
 }