mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2024-11-07 06:33:40 +01:00
More in C
This commit is contained in:
parent
c45db49df5
commit
0ff20e5992
@ -461,52 +461,6 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
block_size = 16
|
||||
call set_multiple_levels_omp(.False.)
|
||||
|
||||
double precision, allocatable :: A1(:,:,:,:)
|
||||
allocate(A1(nO,nO,nO,nO))
|
||||
|
||||
double precision, allocatable :: Y_oooo(:,:,:,:)
|
||||
allocate(Y_oooo(nO,nO,nO,nO))
|
||||
|
||||
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
|
||||
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
|
||||
|
||||
call dgemm('N','N', nO, nO*nO*nO, nV, &
|
||||
1d0, t1 , size(t1,1), &
|
||||
cc_space_v_vooo, size(cc_space_v_vooo,1), &
|
||||
0d0, Y_oooo, size(Y_oooo,1))
|
||||
|
||||
!$omp parallel &
|
||||
!$omp private(u,v,i,j) &
|
||||
!$omp default(shared)
|
||||
!$omp do collapse(2)
|
||||
do j = 1, nO
|
||||
do i = 1, nO
|
||||
do v = 1, nO
|
||||
do u = 1, nO
|
||||
A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
|
||||
deallocate(Y_oooo)
|
||||
|
||||
! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
|
||||
call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
|
||||
1d0, tau , size(tau,1) * size(tau,2), &
|
||||
cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
|
||||
1d0, A1 , size(A1,1) * size(A1,2))
|
||||
|
||||
call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
|
||||
1d0, A1, size(A1,1) * size(A1,2), &
|
||||
tau, size(tau,1) * size(tau,2), &
|
||||
0d0, r2, size(r2,1) * size(r2,2))
|
||||
|
||||
deallocate(A1)
|
||||
|
||||
|
||||
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
|
||||
cc_space_v_vo_chol, cc_space_v_vv_chol, &
|
||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
|
||||
|
@ -109,6 +109,8 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
|
||||
lda = nO * nO;
|
||||
cudaMalloc((void **)&d_r2, lda * nV * nV * sizeof(double));
|
||||
memset(r2, 0, nO*nO*nV*nV*sizeof(double));
|
||||
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2, lda, d_r2, lda);
|
||||
|
||||
lda = cholesky_mo_num * nV;
|
||||
cudaMalloc((void **)&d_cc_space_v_vv_chol, lda * nV * sizeof(double));
|
||||
@ -124,13 +126,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
|
||||
#pragma omp sections
|
||||
{
|
||||
#pragma omp section
|
||||
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
|
||||
{
|
||||
r2[i] += cc_space_v_oovv[i];
|
||||
}
|
||||
|
||||
/*
|
||||
#pragma omp section
|
||||
{
|
||||
double* d_cc_space_v_vooo;
|
||||
@ -198,7 +194,6 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
cudaFree(d_A1);
|
||||
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
@ -265,7 +260,6 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
|
||||
alpha=1.0; beta=1.0;
|
||||
m=nO*nO; n=mbs; k=nV*nV;
|
||||
|
||||
A=d_tau; lda=nO*nO;
|
||||
B=d_B1 ; ldb=nV*nV;
|
||||
C=&(d_r2[nO*nO*(iblock + nV*gam)]); ldc=nO*nO;
|
||||
@ -296,6 +290,11 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
cudaFree(d_r2);
|
||||
cublasDestroy(handle);
|
||||
}
|
||||
|
||||
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
|
||||
{
|
||||
r2[i] += cc_space_v_oovv[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user