More in C

This commit is contained in:
Anthony Scemama 2023-07-17 00:29:39 +02:00
parent c45db49df5
commit 0ff20e5992
2 changed files with 7 additions and 54 deletions

View File

@ -461,52 +461,6 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
block_size = 16
call set_multiple_levels_omp(.False.)
double precision, allocatable :: A1(:,:,:,:)
allocate(A1(nO,nO,nO,nO))
double precision, allocatable :: Y_oooo(:,:,:,:)
allocate(Y_oooo(nO,nO,nO,nO))
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
call dgemm('N','N', nO, nO*nO*nO, nV, &
1d0, t1 , size(t1,1), &
cc_space_v_vooo, size(cc_space_v_vooo,1), &
0d0, Y_oooo, size(Y_oooo,1))
!$omp parallel &
!$omp private(u,v,i,j) &
!$omp default(shared)
!$omp do collapse(2)
do j = 1, nO
do i = 1, nO
do v = 1, nO
do u = 1, nO
A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
enddo
enddo
enddo
enddo
!$omp end do
!$omp end parallel
deallocate(Y_oooo)
! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
1d0, tau , size(tau,1) * size(tau,2), &
cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
1d0, A1 , size(A1,1) * size(A1,2))
call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
1d0, A1, size(A1,1) * size(A1,2), &
tau, size(tau,1) * size(tau,2), &
0d0, r2, size(r2,1) * size(r2,2))
deallocate(A1)
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
cc_space_v_vo_chol, cc_space_v_vv_chol, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &

View File

@ -109,6 +109,8 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
lda = nO * nO;
cudaMalloc((void **)&d_r2, lda * nV * nV * sizeof(double));
memset(r2, 0, nO*nO*nV*nV*sizeof(double));
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2, lda, d_r2, lda);
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_cc_space_v_vv_chol, lda * nV * sizeof(double));
@ -124,13 +126,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
#pragma omp sections
{
#pragma omp section
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
{
r2[i] += cc_space_v_oovv[i];
}
/*
#pragma omp section
{
double* d_cc_space_v_vooo;
@ -198,7 +194,6 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
cudaFree(d_A1);
}
*/
}
@ -265,7 +260,6 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
alpha=1.0; beta=1.0;
m=nO*nO; n=mbs; k=nV*nV;
A=d_tau; lda=nO*nO;
B=d_B1 ; ldb=nV*nV;
C=&(d_r2[nO*nO*(iblock + nV*gam)]); ldc=nO*nO;
@ -296,6 +290,11 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
cudaFree(d_r2);
cublasDestroy(handle);
}
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
{
r2[i] += cc_space_v_oovv[i];
}
}