10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-07-29 08:34:04 +02:00

Fxied GPU interface for gfortran

This commit is contained in:
Anthony Scemama 2024-07-02 13:58:19 +02:00
parent 44a7729f65
commit 2bead959d0
4 changed files with 94 additions and 161 deletions

View File

@ -49,10 +49,11 @@ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
/* Streams */ /* Streams */
void gpu_stream_create(void** ptr) { void gpu_stream_create(void** ptr) {
*ptr = (void*) 2; *ptr = (void*) malloc(sizeof(char));
} }
void gpu_stream_destroy(void** ptr) { void gpu_stream_destroy(void** ptr) {
free(*ptr);
*ptr = NULL; *ptr = NULL;
} }
@ -68,11 +69,12 @@ void gpu_synchronize() {
/* BLAS functions */ /* BLAS functions */
void gpu_blas_create(void** handle) { void gpu_blas_create(void** handle) {
*handle = (void*) 1; *handle = (void*) malloc(sizeof(char));
} }
void gpu_blas_destroy(void** handle) { void gpu_blas_destroy(void** handle) {
free(*handle);
*handle = NULL; *handle = NULL;
} }
@ -122,7 +124,7 @@ void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx,
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
void gpu_dgemv(void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double alpha,
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) {
assert (handle != NULL); assert (handle != NULL);
@ -143,14 +145,14 @@ void gpu_dgemv(void* handle, const char transa, const int64_t m, const int64_t n
assert ( (int64_t) incx_ == incx); assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy); assert ( (int64_t) incy_ == incy);
dgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); dgemv_(transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_);
} }
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
void gpu_sgemv(void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float alpha,
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) {
assert (handle != NULL); assert (handle != NULL);
@ -171,14 +173,14 @@ void gpu_sgemv(void* handle, const char transa, const int64_t m, const int64_t n
assert ( (int64_t) incx_ == incx); assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy); assert ( (int64_t) incy_ == incy);
sgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); sgemv_(transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_);
} }
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double alpha,
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) {
assert (handle != NULL); assert (handle != NULL);
@ -201,7 +203,7 @@ void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t
assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc); assert ( (int64_t) ldc_ == ldc);
dgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); dgemm_(transa, transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_);
} }
@ -209,7 +211,7 @@ void gpu_dgemm(void* handle, const char transa, const char transb, const int64_t
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
void gpu_sgemm(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float alpha,
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) {
assert (handle != NULL); assert (handle != NULL);
@ -232,18 +234,18 @@ void gpu_sgemm(void* handle, const char transa, const char transb, const int64_t
assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc); assert ( (int64_t) ldc_ == ldc);
sgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); sgemm_(transa, transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_);
} }
void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double alpha,
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
assert (handle != NULL); assert (handle != NULL);
if ( (transa == 'N' && transb == 'N') || if ( (*transa == 'N' && *transb == 'N') ||
(transa == 'n' && transb == 'N') || (*transa == 'n' && *transb == 'N') ||
(transa == 'N' && transb == 'n') || (*transa == 'N' && *transb == 'n') ||
(transa == 'n' && transb == 'n') ) { (*transa == 'n' && *transb == 'n') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -271,10 +273,10 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t
} }
} else if ( (transa == 'N' && transb == 'T') || } else if ( (*transa == 'N' && *transb == 'T') ||
(transa == 'n' && transb == 'T') || (*transa == 'n' && *transb == 'T') ||
(transa == 'N' && transb == 't') || (*transa == 'N' && *transb == 't') ||
(transa == 'n' && transb == 't') ) { (*transa == 'n' && *transb == 't') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -302,10 +304,10 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t
} }
} else if ( (transa == 'T' && transb == 'N') || } else if ( (*transa == 'T' && *transb == 'N') ||
(transa == 't' && transb == 'N') || (*transa == 't' && *transb == 'N') ||
(transa == 'T' && transb == 'n') || (*transa == 'T' && *transb == 'n') ||
(transa == 't' && transb == 'n') ) { (*transa == 't' && *transb == 'n') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -333,10 +335,10 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t
} }
} else if ( (transa == 'T' && transb == 'T') || } else if ( (*transa == 'T' && *transb == 'T') ||
(transa == 't' && transb == 'T') || (*transa == 't' && *transb == 'T') ||
(transa == 'T' && transb == 't') || (*transa == 'T' && *transb == 't') ||
(transa == 't' && transb == 't') ) { (*transa == 't' && *transb == 't') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -368,14 +370,14 @@ void gpu_dgeam(void* handle, const char transa, const char transb, const int64_t
} }
void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, void gpu_sgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float alpha,
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
assert (handle != NULL); assert (handle != NULL);
if ( (transa == 'N' && transb == 'N') || if ( (*transa == 'N' && *transb == 'N') ||
(transa == 'n' && transb == 'N') || (*transa == 'n' && *transb == 'N') ||
(transa == 'N' && transb == 'n') || (*transa == 'N' && *transb == 'n') ||
(transa == 'n' && transb == 'n') ) { (*transa == 'n' && *transb == 'n') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -403,10 +405,10 @@ void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t
} }
} else if ( (transa == 'N' && transb == 'T') || } else if ( (*transa == 'N' && *transb == 'T') ||
(transa == 'n' && transb == 'T') || (*transa == 'n' && *transb == 'T') ||
(transa == 'N' && transb == 't') || (*transa == 'N' && *transb == 't') ||
(transa == 'n' && transb == 't') ) { (*transa == 'n' && *transb == 't') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -434,10 +436,10 @@ void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t
} }
} else if ( (transa == 'T' && transb == 'N') || } else if ( (*transa == 'T' && *transb == 'N') ||
(transa == 't' && transb == 'N') || (*transa == 't' && *transb == 'N') ||
(transa == 'T' && transb == 'n') || (*transa == 'T' && *transb == 'n') ||
(transa == 't' && transb == 'n') ) { (*transa == 't' && *transb == 'n') ) {
if (alpha == 0.) { if (alpha == 0.) {
@ -465,10 +467,10 @@ void gpu_sgeam(void* handle, const char transa, const char transb, const int64_t
} }
} else if ( (transa == 'T' && transb == 'T') || } else if ( (*transa == 'T' && *transb == 'T') ||
(transa == 't' && transb == 'T') || (*transa == 't' && *transb == 'T') ||
(transa == 'T' && transb == 't') || (*transa == 'T' && *transb == 't') ||
(transa == 't' && transb == 't') ) { (*transa == 't' && *transb == 't') ) {
if (alpha == 0.) { if (alpha == 0.) {

View File

@ -319,10 +319,10 @@ subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,
call gpu_stream_create(s2) call gpu_stream_create(s2)
call gpu_set_stream(blas_handle,s1) call gpu_set_stream(blas_handle,s1)
call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo, 1, t1, 1, e) call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo%f(1,1), 1, t1%f(1,1), 1, e)
call gpu_set_stream(blas_handle,s2) call gpu_set_stream(blas_handle,s2)
call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x, 1_8, d_cc_space_v_oovv, 1_8, energy) call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x%f(1,1,1,1), 1_8, d_cc_space_v_oovv%f(1,1,1,1), 1_8, energy)
call gpu_set_stream(blas_handle,gpu_default_stream) call gpu_set_stream(blas_handle,gpu_default_stream)
call gpu_synchronize() call gpu_synchronize()
@ -362,9 +362,9 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau)
call gpu_stream_create(stream(b)) call gpu_stream_create(stream(b))
call gpu_set_stream(blas_handle,stream(b)) call gpu_set_stream(blas_handle,stream(b))
do j=1,nO do j=1,nO
call gpu_dgeam_f(blas_handle, 'N', 'N', nO, nV, & call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, &
1.d0, t2%f(1,j,1,b), nO*nO, & 1.d0, t2%f(1,j,1,b), nO*nO, &
h_t1(j,b), t1%f, nO, & h_t1(j,b), t1%f(1,1), nO, &
tau%f(1,j,1,b), nO*nO) tau%f(1,j,1,b), nO*nO)
enddo enddo
enddo enddo
@ -409,7 +409,7 @@ subroutine update_tau_x_space(nO,nV,tau,tau_x)
do b=1,nV do b=1,nV
do a=1,nV do a=1,nV
call gpu_set_stream(blas_handle,stream(a)) call gpu_set_stream(blas_handle,stream(a))
call gpu_dgeam_f(blas_handle, 'N', 'N', nO, nO, & call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, &
2.d0, tau%f(1,1,a,b), nO, & 2.d0, tau%f(1,1,a,b), nO, &
-1.d0, tau%f(1,1,b,a), nO, & -1.d0, tau%f(1,1,b,a), nO, &
tau_x%f(1,1,a,b), nO) tau_x%f(1,1,a,b), nO)

View File

@ -37,15 +37,15 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, &
!$OMP DO !$OMP DO
do u=1,nO do u=1,nO
call gpu_dgeam_f(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & call gpu_dgeam(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, &
tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f, nO, tmp_ovv%f, 1) tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f(1,1,1,1), nO, tmp_ovv%f(1,1,1), 1)
do b=1,nV do b=1,nV
call gpu_dgeam_f(blas, 'T', 'T', nV, nO, 1.d0, & call gpu_dgeam(blas, 'T', 'T', nV, nO, 1.d0, &
tmp_ovv%f(1,1,b), nO, 0.d0, & tmp_ovv%f(1,1,b), nO, 0.d0, &
tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV)
enddo enddo
call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & call gpu_dgemm(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, &
d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_vov%f, nV, & d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_vov%f(1,1,1), nV, &
0.d0, tau_kau%f(1,1,u), cholesky_mo_num) 0.d0, tau_kau%f(1,1,u), cholesky_mo_num)
enddo enddo
!$OMP END DO !$OMP END DO
@ -59,8 +59,8 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, &
!$OMP END PARALLEL !$OMP END PARALLEL
call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, &
tau_kau, cholesky_mo_num*nV, d_cc_space_v_vo_chol, cholesky_mo_num*nV, & tau_kau%f(1,1,1), cholesky_mo_num*nV, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, &
1.d0, H_oo, nO) 1.d0, H_oo%f(1,1), nO)
call gpu_synchronize() call gpu_synchronize()
call gpu_deallocate(tau_kau) call gpu_deallocate(tau_kau)
@ -103,12 +103,12 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, &
!$OMP DO !$OMP DO
do a = 1, nV do a = 1, nV
do b=1,nV do b=1,nV
call gpu_dgeam_f(blas, 'N', 'N', nO, nO, 1.d0, & call gpu_dgeam(blas, 'N', 'N', nO, nO, 1.d0, &
tau_x%f(1,1,a,b), nO, 0.d0, & tau_x%f(1,1,a,b), nO, 0.d0, &
tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO) tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO)
enddo enddo
call gpu_dgemm_f(blas, 'N','T',cholesky_mo_num,nO,nO*nV,1.d0, & call gpu_dgemm(blas, 'N', 'T', cholesky_mo_num, nO, nO*nV, 1.d0, &
d_cc_space_v_ov_chol%f, cholesky_mo_num, tmp_oov%f, nO, & d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_oov%f(1,1,1), nO, &
0.d0, tau_kia%f(1,1,a), cholesky_mo_num) 0.d0, tau_kia%f(1,1,a), cholesky_mo_num)
enddo enddo
!$OMP END DO !$OMP END DO
@ -120,8 +120,8 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, &
!$OMP END PARALLEL !$OMP END PARALLEL
call gpu_dgemm(blas_handle, 'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & call gpu_dgemm(blas_handle, 'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, &
tau_kia, cholesky_mo_num*nO, d_cc_space_v_ov_chol, cholesky_mo_num*nO, & tau_kia%f(1,1,1), cholesky_mo_num*nO, d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, &
1.d0, H_vv, nV) 1.d0, H_vv%f(1,1), nV)
call gpu_synchronize() call gpu_synchronize()
call gpu_deallocate(tau_kia) call gpu_deallocate(tau_kia)
@ -148,12 +148,12 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, &
call gpu_allocate(tmp_k, cholesky_mo_num) call gpu_allocate(tmp_k, cholesky_mo_num)
call gpu_dgemm(blas_handle, 'N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & call gpu_dgemm(blas_handle, 'N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, &
d_cc_space_v_ov_chol, cholesky_mo_num, & d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, &
t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) t1%f(1,1), nO*nV, 0.d0, tmp_k%f(1), cholesky_mo_num)
call gpu_dgemm(blas_handle, 'T', 'N', nV*nO, 1, cholesky_mo_num, 1.d0, & call gpu_dgemm(blas_handle, 'T', 'N', nV*nO, 1, cholesky_mo_num, 1.d0, &
d_cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num, tmp_k%f(1), cholesky_mo_num, 1.d0, &
H_vo, nV*nO) H_vo%f(1,1), nV*nO)
call gpu_deallocate(tmp_k) call gpu_deallocate(tmp_k)
@ -161,7 +161,7 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, &
call gpu_allocate(tmp, cholesky_mo_num, nO, nO) call gpu_allocate(tmp, cholesky_mo_num, nO, nO)
call gpu_dgemm(blas_handle, 'N', 'T', cholesky_mo_num*nO, nO, nV, 1.d0, & call gpu_dgemm(blas_handle, 'N', 'T', cholesky_mo_num*nO, nO, nV, 1.d0, &
d_cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, 0.d0, tmp%f(1,1,1), cholesky_mo_num*nO)
call gpu_allocate(tmp2, cholesky_mo_num, nO, nO) call gpu_allocate(tmp2, cholesky_mo_num, nO, nO)
@ -174,7 +174,7 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, &
do i=1,nO do i=1,nO
do j=1,nO do j=1,nO
call gpu_set_stream(blas_handle,stream(j)) call gpu_set_stream(blas_handle,stream(j))
call gpu_dgeam_f(blas_handle, 'N', 'N', cholesky_mo_num, 1, 1.d0, & call gpu_dgeam(blas_handle, 'N', 'N', cholesky_mo_num, 1, 1.d0, &
tmp%f(1,i,j), cholesky_mo_num, 0.d0, & tmp%f(1,i,j), cholesky_mo_num, 0.d0, &
tmp%f(1,i,j), cholesky_mo_num, tmp2%f(1,j,i), cholesky_mo_num) tmp%f(1,i,j), cholesky_mo_num, tmp2%f(1,j,i), cholesky_mo_num)
enddo enddo
@ -190,8 +190,8 @@ subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, &
call gpu_deallocate(tmp) call gpu_deallocate(tmp)
call gpu_dgemm(blas_handle, 'T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & call gpu_dgemm(blas_handle, 'T','N', nV, nO, cholesky_mo_num*nO, -1.d0, &
d_cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, tmp2%f(1,1,1), cholesky_mo_num*nO, &
1.d0, H_vo, nV) 1.d0, H_vo%f(1,1), nV)
call gpu_synchronize() call gpu_synchronize()
call gpu_deallocate(tmp2) call gpu_deallocate(tmp2)

View File

@ -156,17 +156,17 @@ module gpu
character(c_char), intent(in), value :: transa, transb character(c_char), intent(in), value :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
real(c_float), intent(in), value :: alpha, beta real(c_float), intent(in), value :: alpha, beta
type(c_ptr), value :: a, b, c real(c_float) :: a, b, c
end subroutine end subroutine
subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm') b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm')
import import
type(c_ptr), value, intent(in) :: handle type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in), value :: transa, transb character(c_char), intent(in) :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc
real(c_double), intent(in), value :: alpha, beta real(c_double), intent(in), value :: alpha, beta
type(c_ptr), value :: a, b, c real(c_double) :: a, b, c
end subroutine end subroutine
subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
@ -176,7 +176,7 @@ module gpu
character(c_char), intent(in), value :: transa, transb character(c_char), intent(in), value :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc
real(c_float), intent(in), value :: alpha, beta real(c_float), intent(in), value :: alpha, beta
type(c_ptr), value :: a, b, c real(c_float) :: a, b, c
end subroutine end subroutine
end interface end interface
@ -570,7 +570,6 @@ module gpu
end subroutine end subroutine
end module
@ -578,38 +577,20 @@ end module
! --- ! ---
subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res)
use gpu ! use gpu
type(gpu_blas), intent(in) :: handle type(gpu_blas), intent(in) :: handle
integer*4 :: n, incx, incy integer*4 :: n, incx, incy
type(gpu_double1), intent(in) :: dx, dy double precision, target :: dx, dy
double precision, intent(out) :: res
call gpu_ddot_c(handle%c, int(n,c_int64_t), dx%c, int(incx,c_int64_t), dy%c, int(incy,c_int64_t), res)
end subroutine
subroutine gpu_ddot_f(handle, n, dx, incx, dy, incy, res)
use gpu
type(gpu_blas), intent(in) :: handle
integer*4 :: n, incx, incy
double precision, target :: dx(*), dy(*)
double precision, intent(out) :: res double precision, intent(out) :: res
call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res) call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res)
end subroutine end subroutine
subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res) subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res)
use gpu ! use gpu
type(gpu_blas), intent(in) :: handle type(gpu_blas), intent(in) :: handle
integer*8 :: n, incx, incy integer*8 :: n, incx, incy
type(gpu_double1), intent(in) :: dx, dy double precision, target :: dx, dy
double precision, intent(out) :: res
call gpu_ddot_c(handle%c, n, dx%c, incx, dy%c, incy, res)
end subroutine
subroutine gpu_ddot_f_64(handle, n, dx, incx, dy, incy, res)
use gpu
type(gpu_blas), intent(in) :: handle
integer*8 :: n, incx, incy
double precision, target :: dx(*), dy(*)
double precision, intent(out) :: res double precision, intent(out) :: res
call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res)
end subroutine end subroutine
@ -620,25 +601,12 @@ end subroutine
subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc) b, ldb, c, ldc)
use gpu ! use gpu
type(gpu_blas), intent(in) :: handle type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb character, intent(in) :: transa, transb
integer*4, intent(in) :: m, n, lda, ldb, ldc integer*4, intent(in) :: m, n, lda, ldb, ldc
double precision, intent(in) :: alpha, beta double precision, intent(in) :: alpha, beta
type(gpu_double2) :: a, b, c double precision, target :: a, b, c
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a%c, int(lda,c_int64_t), beta, &
b%c, int(ldb,c_int64_t), c%c, int(ldc,c_int64_t))
end subroutine
subroutine gpu_dgeam_f(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc)
use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*4, intent(in) :: m, n, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision, target :: a(*), b(*), c(*)
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, &
c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t))
end subroutine end subroutine
@ -646,25 +614,12 @@ end subroutine
subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, & subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc) b, ldb, c, ldc)
use gpu ! use gpu
type(gpu_blas), intent(in) :: handle type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb character, intent(in) :: transa, transb
integer*8, intent(in) :: m, n, lda, ldb, ldc integer*8, intent(in) :: m, n, lda, ldb, ldc
double precision, intent(in) :: alpha, beta double precision, intent(in) :: alpha, beta
type(gpu_double2) :: a, b, c double precision, target :: a, b, c
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, a%c, int(lda,c_int64_t), beta, &
b%c, int(ldb,c_int64_t), c%c, int(ldc,c_int64_t))
end subroutine
subroutine gpu_dgeam_f_64(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc)
use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*8, intent(in) :: m, n, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision, target :: a(*), b(*), c(*)
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, &
c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t))
end subroutine end subroutine
@ -675,51 +630,27 @@ end subroutine
subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, & subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc) b, ldb, beta, c, ldc)
use gpu ! use gpu
type(gpu_blas), intent(in) :: handle type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb character, intent(in) :: transa, transb
integer*4, intent(in) :: m, n, k, lda, ldb, ldc integer*4, intent(in) :: m, n, k, lda, ldb, ldc
double precision, intent(in) :: alpha, beta double precision, intent(in) :: alpha, beta
type(gpu_double2) :: a, b, c double precision :: a, b, c
call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
alpha, a%c, int(lda,c_int64_t), & alpha, a, int(lda,c_int64_t), &
b%c, int(ldb,c_int64_t), beta, c%c, int(ldc,c_int64_t)) b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
end subroutine end subroutine
subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, & subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc) b, ldb, beta, c, ldc)
use gpu ! use gpu
type(gpu_blas), intent(in) :: handle type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb character, intent(in) :: transa, transb
integer*8, intent(in) :: m, n, k, lda, ldb, ldc integer*8, intent(in) :: m, n, k, lda, ldb, ldc
double precision, intent(in) :: alpha, beta double precision, intent(in) :: alpha, beta
type(gpu_double2) :: a, b, c double precision :: a, b, c
call gpu_dgemm_c(handle%c, transa, transb, m, n, k, &
alpha, a%c, lda, b%c, ldb, beta, c%c, ldc)
end subroutine
subroutine gpu_dgemm_f(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc)
use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*4, intent(in) :: m, n, k, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision, target :: a(*), b(*), c(*)
call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
alpha, c_loc(a), int(lda,c_int64_t), & alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
c_loc(b), int(ldb,c_int64_t), beta, c_loc(c), int(ldc,c_int64_t))
end subroutine
subroutine gpu_dgemm_f_64(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc)
use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*8, intent(in) :: m, n, k, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision, target :: a(*), b(*), c(*)
call gpu_dgemm_c(handle%c, transa, transb, m, n, k, &
alpha, c_loc(a), lda, c_loc(b), ldb, beta, c_loc(c), ldc)
end subroutine end subroutine
end module