diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index d23073b8..3c9a2cfc 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1549,19 +1549,12 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, cc_list_vir,cc_list_vir,cc_list_vir,(/ gam /), B1) + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) - !$omp do - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) - enddo - enddo - enddo - !$omp end do nowait do i = 1, nO !$omp do do b = 1, nV @@ -1569,7 +1562,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) enddo enddo - !$omp end do nowait + !$omp end do enddo !$omp end parallel diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index dafcf7af..2e7ecdd4 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -48,32 +48,86 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k if (do_ao_cholesky) then - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) + allocate(buffer(n1,n3,n2,n4)) + + !$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k) !$OMP DO - do i4 = 1, n4 + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_ao_num + v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i4=1,n4 idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 + do i2=1,n2 idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) + do k=1,cholesky_ao_num + v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + !$OMP END PARALLEL + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + v1, cholesky_ao_num, & + v2, cholesky_ao_num, 0.d0, buffer, n1*n3) + + deallocate(v1,v2) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) enddo enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END PARALLEL DO + +! !$OMP PARALLEL & +! !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,cholesky_mo_transp,cholesky_ao_num,v1) & +! !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer,v2)& +! !$OMP DEFAULT(NONE) +! allocate(buffer(n1,n3,n2), v2(cholesky_ao_num,n2)) +! !$OMP DO +! do i4 = 1, n4 +! idx4 = list4(i4) +! do i2=1,n2 +! idx2 = list2(i2) +! do k=1,cholesky_ao_num +! v2(k,i2) = cholesky_mo_transp(k,idx2,idx4) +! enddo +! enddo +! call dgemm('T','N', n1*n3, n2, cholesky_ao_num, 1.d0, & +! v1, cholesky_ao_num, & +! v2, cholesky_ao_num, 0.d0, buffer, n1*n3) +! do i3 = 1, n3 +! do i2 = 1, n2 +! do i1 = 1, n1 +! v(i1,i2,i3,i4) = buffer(i1,i3,i2) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! deallocate(buffer, v2) +! !$OMP END PARALLEL +! deallocate(v1) else double precision :: get_two_e_integral @@ -112,6 +166,7 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] if (do_ao_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) + call set_multiple_levels_omp(.False.) !$OMP PARALLEL & !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)&