9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-04-28 11:14:43 +02:00
qp2/src/gpu/gpu_module.F90
Anthony Scemama caec518384
Some checks failed
continuous-integration/drone/push Build is failing
Added Nvidia module
2024-06-28 14:53:48 +02:00

483 lines
14 KiB
Fortran

module gpu
use, intrinsic :: iso_c_binding
implicit none
! Data types
! ----------
type gpu_double1
type(c_ptr) :: c
double precision, pointer :: f(:)
end type
type gpu_double2
type(c_ptr) :: c
double precision, pointer :: f(:,:)
end type
type gpu_double3
type(c_ptr) :: c
double precision, pointer :: f(:,:,:)
end type
type gpu_double4
type(c_ptr) :: c
double precision, pointer :: f(:,:,:,:)
end type
type gpu_double5
type(c_ptr) :: c
double precision, pointer :: f(:,:,:,:,:)
end type
type gpu_double6
type(c_ptr) :: c
double precision, pointer :: f(:,:,:,:,:,:)
end type
type gpu_blas
type(c_ptr) :: c
end type
type gpu_stream
type(c_ptr) :: c
end type
! C interfaces
! ------------
interface
integer function gpu_ndevices() bind(C)
end function
subroutine gpu_set_device(id) bind(C)
import
integer(c_int32_t), value :: id
end subroutine
subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate')
import
type(c_ptr) :: ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate')
import
type(c_ptr) :: ptr
end subroutine
subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload')
import
type(c_ptr), value :: cpu_ptr
type(c_ptr), value :: gpu_ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download')
import
type(c_ptr), value :: gpu_ptr
type(c_ptr), value :: cpu_ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy')
import
type(c_ptr), value :: gpu_ptr_src
type(c_ptr), value :: gpu_ptr_dest
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_stream_create_c(stream) bind(C, name='gpu_stream_create')
import
type(c_ptr) :: stream
end subroutine
subroutine gpu_stream_destroy_c(stream) bind(C, name='gpu_stream_destroy')
import
type(c_ptr) :: stream
end subroutine
subroutine gpu_set_stream_c(handle, stream) bind(C, name='gpu_set_stream')
import
type(c_ptr) :: handle, stream
end subroutine
subroutine gpu_synchronize() bind(C)
import
end subroutine
subroutine gpu_blas_create_c(handle) bind(C, name='gpu_blas_create')
import
type(c_ptr) :: handle
end subroutine
subroutine gpu_blas_destroy_c(handle) bind(C, name='gpu_blas_destroy')
import
type(c_ptr) :: handle
end subroutine
subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot')
import
type(c_ptr), intent(in) :: handle
integer(c_int64_t), value :: n, incx, incy
type(c_ptr), intent(in), value :: dx, dy
real(c_double), intent(out) :: res
end subroutine
subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot')
import
type(c_ptr), intent(in) :: handle
integer(c_int64_t), value :: n, incx, incy
type(c_ptr), intent(in), value :: dx, dy
real(c_float), intent(out) :: res
end subroutine
subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc) bind(C, name='gpu_dgeam')
import
type(c_ptr), intent(in) :: handle
character(c_char), intent(in), value :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
real(c_double), intent(in), value :: alpha, beta
type(c_ptr), value :: a, b, c
end subroutine
end interface
! Polymorphic interfaces
! ----------------------
interface gpu_allocate
procedure gpu_allocate_double1 &
,gpu_allocate_double2 &
,gpu_allocate_double3 &
,gpu_allocate_double4 &
,gpu_allocate_double5 &
,gpu_allocate_double6
end interface gpu_allocate
interface gpu_deallocate
procedure gpu_deallocate_double1 &
,gpu_deallocate_double2 &
,gpu_deallocate_double3 &
,gpu_deallocate_double4 &
,gpu_deallocate_double5 &
,gpu_deallocate_double6
end interface gpu_deallocate
interface gpu_upload
procedure gpu_upload_double1 &
,gpu_upload_double2 &
,gpu_upload_double3 &
,gpu_upload_double4 &
,gpu_upload_double5 &
,gpu_upload_double6
end interface gpu_upload
interface gpu_download
procedure gpu_download_double1 &
,gpu_download_double2 &
,gpu_download_double3 &
,gpu_download_double4 &
,gpu_download_double5 &
,gpu_download_double6
end interface gpu_download
interface gpu_copy
procedure gpu_copy_double1 &
,gpu_copy_double2 &
,gpu_copy_double3 &
,gpu_copy_double4 &
,gpu_copy_double5 &
,gpu_copy_double6
end interface gpu_copy
contains
! gpu_allocate
! ------------
subroutine gpu_allocate_double1(ptr, s)
implicit none
type(gpu_double1), intent(inout) :: ptr
integer, intent(in) :: s
call gpu_allocate_c(ptr%c, s*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s /))
end subroutine
subroutine gpu_allocate_double2(ptr, s1, s2)
implicit none
type(gpu_double2), intent(inout) :: ptr
integer, intent(in) :: s1, s2
call gpu_allocate_c(ptr%c, s1*s2*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
end subroutine
subroutine gpu_allocate_double3(ptr, s1, s2, s3)
implicit none
type(gpu_double3), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3
call gpu_allocate_c(ptr%c, s1*s2*s3*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
end subroutine
subroutine gpu_allocate_double4(ptr, s1, s2, s3, s4)
implicit none
type(gpu_double4), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3, s4
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
end subroutine
subroutine gpu_allocate_double5(ptr, s1, s2, s3, s4, s5)
implicit none
type(gpu_double5), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3, s4, s5
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
end subroutine
subroutine gpu_allocate_double6(ptr, s1, s2, s3, s4, s5, s6)
implicit none
type(gpu_double6), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3, s4, s5, s6
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
end subroutine
! gpu_deallocate
! --------------
subroutine gpu_deallocate_double1(ptr)
implicit none
type(gpu_double1), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double2(ptr)
implicit none
type(gpu_double2), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double3(ptr)
implicit none
type(gpu_double3), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double4(ptr)
implicit none
type(gpu_double4), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double5(ptr)
implicit none
type(gpu_double5), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double6(ptr)
implicit none
type(gpu_double6), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
! gpu_upload
! ----------
subroutine gpu_upload_double1(cpu_ptr, gpu_ptr)
implicit none
double precision, intent(in) :: cpu_ptr(:)
type(gpu_double1), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*size(gpu_ptr%f))
end subroutine
subroutine gpu_upload_double2(cpu_ptr, gpu_ptr)
implicit none
double precision, intent(in) :: cpu_ptr(:,:)
type(gpu_double2), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double3(cpu_ptr, gpu_ptr)
implicit none
double precision, intent(in) :: cpu_ptr(:,:,:)
type(gpu_double3), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double4(cpu_ptr, gpu_ptr)
implicit none
double precision, intent(in) :: cpu_ptr(:,:,:,:)
type(gpu_double4), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double5(cpu_ptr, gpu_ptr)
implicit none
double precision, intent(in) :: cpu_ptr(:,:,:,:,:)
type(gpu_double5), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double6(cpu_ptr, gpu_ptr)
implicit none
double precision, intent(in) :: cpu_ptr(:,:,:,:,:,:)
type(gpu_double6), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
! gpu_download
! ------------
subroutine gpu_download_double1(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double1), intent(in) :: gpu_ptr
double precision, intent(in) :: cpu_ptr(:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*size(gpu_ptr%f))
end subroutine
subroutine gpu_download_double2(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double2), intent(in) :: gpu_ptr
double precision, intent(in) :: cpu_ptr(:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double3(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double3), intent(in) :: gpu_ptr
double precision, intent(in) :: cpu_ptr(:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double4(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double4), intent(in) :: gpu_ptr
double precision, intent(in) :: cpu_ptr(:,:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double5(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double5), intent(in) :: gpu_ptr
double precision, intent(in) :: cpu_ptr(:,:,:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double6(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double6), intent(in) :: gpu_ptr
double precision, intent(in) :: cpu_ptr(:,:,:,:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
! gpu_copy
! --------
subroutine gpu_copy_double1(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double1), intent(in) :: gpu_ptr_src
type(gpu_double1), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*size(gpu_ptr_dest%f))
end subroutine
subroutine gpu_copy_double2(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double2), intent(in) :: gpu_ptr_src
type(gpu_double2), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double3(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double3), intent(in) :: gpu_ptr_src
type(gpu_double3), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double4(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double4), intent(in) :: gpu_ptr_src
type(gpu_double4), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double5(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double5), intent(in) :: gpu_ptr_src
type(gpu_double5), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double6(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double6), intent(in) :: gpu_ptr_src
type(gpu_double6), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
! gpu_stream
! ----------
subroutine gpu_stream_create(stream)
import
type(gpu_stream) :: stream
call gpu_stream_create_c(stream%c)
end subroutine
subroutine gpu_stream_destroy(stream)
import
type(gpu_stream) :: stream
call gpu_stream_destroy_c(stream%c)
end subroutine
subroutine gpu_set_stream(handle, stream)
import
type(gpu_blas) :: handle
type(gpu_stream) :: stream
call gpu_set_stream_c(handle%c, stream%c)
end subroutine
! gpu_blas
! --------
subroutine gpu_blas_create(handle)
import
type(gpu_blas) :: handle
call gpu_blas_create_c(handle%c)
end subroutine
subroutine gpu_blas_destroy(handle)
import
type(gpu_blas) :: handle
call gpu_blas_destroy_c(handle%c)
end subroutine
end module