10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-30 16:15:48 +01:00
QuantumPackage/plugins/local/tc_int/no_1e.irp.f
2024-08-10 18:13:23 +02:00

1180 lines
50 KiB
Fortran

! ---
subroutine provide_no_1e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, noL_1e)
implicit none
integer, intent(in) :: n_grid, n_mo
integer, intent(in) :: ne_a, ne_b
double precision, intent(in) :: wr1(n_grid)
double precision, intent(in) :: mos_l_in_r(n_grid,n_mo)
double precision, intent(in) :: mos_r_in_r(n_grid,n_mo)
double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo)
double precision, intent(out) :: noL_1e(n_mo,n_mo)
integer :: p, s, i, j, ipoint
double precision :: t0, t1
double precision, allocatable :: tmpC(:,:,:,:), tmpD(:,:), tmpE(:,:,:), tmpF(:,:,:)
double precision, allocatable :: tmpL(:,:,:), tmpR(:,:,:), tmpM(:,:), tmpS(:), tmpO(:), tmpJ(:,:)
double precision, allocatable :: tmpL0(:,:,:), tmpR0(:,:,:)
double precision, allocatable :: tmpM_priv(:,:), tmpS_priv(:), tmpO_priv(:), tmpJ_priv(:,:)
call wall_time(t0)
if(ne_a .eq. ne_b) then
allocate(tmpO(n_grid), tmpJ(n_grid,3))
tmpO = 0.d0
tmpJ = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) &
!$OMP SHARED(ne_b, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpO, tmpJ)
allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3))
tmpO_priv = 0.d0
tmpJ_priv = 0.d0
!$OMP DO
do i = 1, ne_b
do ipoint = 1, n_grid
tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpO = tmpO + tmpO_priv
tmpJ = tmpJ + tmpJ_priv
!$OMP END CRITICAL
deallocate(tmpO_priv, tmpJ_priv)
!$OMP END PARALLEL
! ---
allocate(tmpM(n_grid,3), tmpS(n_grid))
tmpM = 0.d0
tmpS = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) &
!$OMP SHARED(ne_b, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpM, tmpS)
allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid))
tmpM_priv = 0.d0
tmpS_priv = 0.d0
!$OMP DO COLLAPSE(2)
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpM = tmpM + tmpM_priv
tmpS = tmpS + tmpS_priv
!$OMP END CRITICAL
deallocate(tmpM_priv, tmpS_priv)
!$OMP END PARALLEL
! ---
allocate(tmpC(n_grid,4,n_mo,n_mo))
allocate(tmpD(n_grid,4))
do ipoint = 1, n_grid
tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1))
tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2))
tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3))
tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint)
tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint)
enddo
deallocate(tmpO, tmpM)
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, s, i, ipoint) &
!$OMP SHARED(n_mo, ne_b, n_grid, &
!$OMP int2_grad1_u12, tmpC)
!$OMP DO COLLAPSE(2)
do s = 1, n_mo
do p = 1, n_mo
do ipoint = 1, n_grid
tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s)
tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s)
tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s)
enddo
tmpC(:,4,p,s) = 0.d0
do i = 1, ne_b
do ipoint = 1, n_grid
tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s)
enddo
enddo
enddo ! p
enddo ! s
!$OMP END DO
!$OMP END PARALLEL
call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 &
, tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) &
, tmpD(1,1), 1 &
, 0.d0, noL_1e(1,1), 1)
deallocate(tmpC, tmpD)
! ---
allocate(tmpL(n_grid,3,n_mo))
allocate(tmpR(n_grid,3,n_mo))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, ipoint) &
!$OMP SHARED(ne_b, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpL, tmpR)
!$OMP DO
do p = 1, n_mo
tmpL(:,1:3,p) = 0.d0
tmpR(:,1:3,p) = 0.d0
do i = 1, ne_b
do ipoint = 1, n_grid
tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i)
tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i)
enddo
enddo
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
! ---
allocate(tmpE(n_grid,5,n_mo))
allocate(tmpF(n_grid,5,n_mo))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, j, ipoint) &
!$OMP SHARED(ne_b, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, wr1, &
!$OMP tmpL, tmpR, tmpJ, tmpS, tmpE, tmpF)
!$OMP DO
do p = 1, n_mo
do ipoint = 1, n_grid
tmpE(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p)
tmpE(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3))
tmpE(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p)
tmpE(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p)
tmpE(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p)
tmpF(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) &
+ mos_r_in_r(ipoint,p) * tmpS(ipoint)
tmpF(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p)
tmpF(ipoint,3,p) = tmpR(ipoint,1,p)
tmpF(ipoint,4,p) = tmpR(ipoint,2,p)
tmpF(ipoint,5,p) = tmpR(ipoint,3,p)
enddo
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
enddo ! ipoint
enddo ! j
enddo ! i
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
deallocate(tmpL, tmpR, tmpJ, tmpS)
call dgemm( 'T', 'N', n_mo, n_mo, 5*n_grid, 1.d0 &
, tmpE(1,1,1), 5*n_grid, tmpF(1,1,1), 5*n_grid &
, 1.d0, noL_1e(1,1), n_mo)
deallocate(tmpE, tmpF)
! ---
else
allocate(tmpO(n_grid), tmpJ(n_grid,3))
tmpO = 0.d0
tmpJ = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) &
!$OMP SHARED(ne_b, ne_a, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpO, tmpJ)
allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3))
tmpO_priv = 0.d0
tmpJ_priv = 0.d0
!$OMP DO
do i = 1, ne_b
do ipoint = 1, n_grid
tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO
do i = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpO_priv(ipoint) = tmpO_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i)
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i)
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpO = tmpO + tmpO_priv
tmpJ = tmpJ + tmpJ_priv
!$OMP END CRITICAL
deallocate(tmpO_priv, tmpJ_priv)
!$OMP END PARALLEL
! ---
allocate(tmpM(n_grid,3), tmpS(n_grid))
tmpM = 0.d0
tmpS = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) &
!$OMP SHARED(ne_b, ne_a, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpM, tmpS)
allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid))
tmpM_priv = 0.d0
tmpS_priv = 0.d0
!$OMP DO COLLAPSE(2)
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO COLLAPSE(2)
do i = ne_b+1, ne_a
do j = 1, ne_b
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO COLLAPSE(2)
do i = ne_b+1, ne_a
do j = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpM = tmpM + tmpM_priv
tmpS = tmpS + tmpS_priv
!$OMP END CRITICAL
deallocate(tmpM_priv, tmpS_priv)
!$OMP END PARALLEL
! ---
allocate(tmpC(n_grid,4,n_mo,n_mo))
allocate(tmpD(n_grid,4))
do ipoint = 1, n_grid
tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1))
tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2))
tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3))
tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint)
tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint)
enddo
deallocate(tmpO, tmpM)
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, s, i, ipoint) &
!$OMP SHARED(n_mo, ne_b, n_grid, &
!$OMP ne_a, int2_grad1_u12, tmpC)
!$OMP DO COLLAPSE(2)
do s = 1, n_mo
do p = 1, n_mo
do ipoint = 1, n_grid
tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s)
tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s)
tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s)
enddo
tmpC(:,4,p,s) = 0.d0
do i = 1, ne_b
do ipoint = 1, n_grid
tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s)
enddo
enddo
do i = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) &
+ 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) &
+ 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s)
enddo
enddo
enddo ! p
enddo ! s
!$OMP END DO
!$OMP END PARALLEL
call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 &
, tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) &
, tmpD(1,1), 1 &
, 0.d0, noL_1e(1,1), 1)
deallocate(tmpC, tmpD)
! ---
allocate(tmpL(n_grid,3,n_mo), tmpL0(n_grid,3,n_mo))
allocate(tmpR(n_grid,3,n_mo), tmpR0(n_grid,3,n_mo))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, ipoint) &
!$OMP SHARED(ne_b, ne_a, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpL0, tmpR0, tmpL, tmpR)
!$OMP DO
do p = 1, n_mo
tmpL0(:,1:3,p) = 0.d0
tmpR0(:,1:3,p) = 0.d0
do i = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpL0(ipoint,1,p) = tmpL0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i)
tmpL0(ipoint,2,p) = tmpL0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i)
tmpL0(ipoint,3,p) = tmpL0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i)
tmpR0(ipoint,1,p) = tmpR0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i)
tmpR0(ipoint,2,p) = tmpR0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i)
tmpR0(ipoint,3,p) = tmpR0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i)
enddo
enddo
tmpL(:,1:3,p) = tmpL0(:,1:3,p)
tmpR(:,1:3,p) = tmpR0(:,1:3,p)
do i = 1, ne_b
do ipoint = 1, n_grid
tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i)
tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i)
enddo
enddo
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
! ---
allocate(tmpE(n_grid,8,n_mo))
allocate(tmpF(n_grid,8,n_mo))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, j, ipoint) &
!$OMP SHARED(ne_b, ne_a, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, wr1, &
!$OMP tmpL, tmpL0, tmpR, tmpR0, tmpJ, tmpS, tmpE, tmpF)
!$OMP DO
do p = 1, n_mo
do ipoint = 1, n_grid
tmpE(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p)
tmpE(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3))
tmpE(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p)
tmpE(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p)
tmpE(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p)
tmpE(ipoint,6,p) = wr1(ipoint) * tmpL0(ipoint,1,p)
tmpE(ipoint,7,p) = wr1(ipoint) * tmpL0(ipoint,2,p)
tmpE(ipoint,8,p) = wr1(ipoint) * tmpL0(ipoint,3,p)
tmpF(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) &
+ mos_r_in_r(ipoint,p) * tmpS(ipoint)
tmpF(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p)
tmpF(ipoint,3,p) = tmpR(ipoint,1,p)
tmpF(ipoint,4,p) = tmpR(ipoint,2,p)
tmpF(ipoint,5,p) = tmpR(ipoint,3,p)
tmpF(ipoint,6,p) = tmpR0(ipoint,1,p)
tmpF(ipoint,7,p) = tmpR0(ipoint,2,p)
tmpF(ipoint,8,p) = tmpR0(ipoint,3,p)
enddo
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
enddo ! ipoint
enddo ! j
enddo ! i
do i = ne_b+1, ne_a
do j = 1, ne_b
do ipoint = 1, n_grid
tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,p,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,p,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,p,j) * int2_grad1_u12(ipoint,3,j,i) )
tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,j,i) * int2_grad1_u12(ipoint,1,i,p) &
+ int2_grad1_u12(ipoint,2,j,i) * int2_grad1_u12(ipoint,2,i,p) &
+ int2_grad1_u12(ipoint,3,j,i) * int2_grad1_u12(ipoint,3,i,p) )
enddo ! ipoint
enddo ! j
enddo ! i
do i = ne_b+1, ne_a
do j = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
enddo ! ipoint
enddo ! j
enddo ! i
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
deallocate(tmpL0, tmpL, tmpR0, tmpR, tmpJ, tmpS)
call dgemm( 'T', 'N', n_mo, n_mo, 8*n_grid, 1.d0 &
, tmpE(1,1,1), 8*n_grid, tmpF(1,1,1), 8*n_grid &
, 1.d0, noL_1e(1,1), n_mo)
deallocate(tmpE, tmpF)
endif
call wall_time(t1)
write(*,"(A,2X,F15.7)") ' wall time for noL_1e (sec) = ', (t1 - t0)
return
end
! ---
subroutine provide_no_1e_tmp(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, &
tmpO, tmpJ, tmpM, tmpS, tmpC, tmpD, tmpL, tmpR, tmpE, tmpF, noL_1e)
implicit none
integer, intent(in) :: n_grid, n_mo
integer, intent(in) :: ne_a, ne_b
double precision, intent(in) :: wr1(n_grid)
double precision, intent(in) :: mos_l_in_r(n_grid,n_mo)
double precision, intent(in) :: mos_r_in_r(n_grid,n_mo)
double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo)
double precision, intent(out) :: tmpO(n_grid), tmpJ(n_grid,3)
double precision, intent(out) :: tmpM(n_grid,3), tmpS(n_grid)
double precision, intent(out) :: tmpC(n_grid,4,n_mo,n_mo), tmpD(n_grid,4)
double precision, intent(out) :: tmpL(n_grid,3,n_mo), tmpR(n_grid,3,n_mo)
double precision, intent(out) :: tmpE(n_grid,5,n_mo), tmpF(n_grid,5,n_mo)
double precision, intent(out) :: noL_1e(n_mo,n_mo)
integer :: p, s, i, j, ipoint
double precision :: t0, t1
double precision, allocatable :: tmpM_priv(:,:), tmpS_priv(:), tmpO_priv(:), tmpJ_priv(:,:)
double precision, allocatable :: tmpL0(:,:,:), tmpR0(:,:,:)
double precision, allocatable :: tmpE_os(:,:,:), tmpF_os(:,:,:)
call wall_time(t0)
if(ne_a .eq. ne_b) then
tmpO = 0.d0
tmpJ = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) &
!$OMP SHARED(ne_b, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpO, tmpJ)
allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3))
tmpO_priv = 0.d0
tmpJ_priv = 0.d0
!$OMP DO
do i = 1, ne_b
do ipoint = 1, n_grid
tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpO = tmpO + tmpO_priv
tmpJ = tmpJ + tmpJ_priv
!$OMP END CRITICAL
deallocate(tmpO_priv, tmpJ_priv)
!$OMP END PARALLEL
! ---
tmpM = 0.d0
tmpS = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) &
!$OMP SHARED(ne_b, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpM, tmpS)
allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid))
tmpM_priv = 0.d0
tmpS_priv = 0.d0
!$OMP DO COLLAPSE(2)
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpM = tmpM + tmpM_priv
tmpS = tmpS + tmpS_priv
!$OMP END CRITICAL
deallocate(tmpM_priv, tmpS_priv)
!$OMP END PARALLEL
! ---
do ipoint = 1, n_grid
tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1))
tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2))
tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3))
tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint)
tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint)
enddo
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, s, i, ipoint) &
!$OMP SHARED(n_mo, ne_b, n_grid, &
!$OMP int2_grad1_u12, tmpC)
!$OMP DO COLLAPSE(2)
do s = 1, n_mo
do p = 1, n_mo
do ipoint = 1, n_grid
tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s)
tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s)
tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s)
enddo
tmpC(:,4,p,s) = 0.d0
do i = 1, ne_b
do ipoint = 1, n_grid
tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s)
enddo
enddo
enddo ! p
enddo ! s
!$OMP END DO
!$OMP END PARALLEL
call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 &
, tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) &
, tmpD(1,1), 1 &
, 0.d0, noL_1e(1,1), 1)
! ---
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, ipoint) &
!$OMP SHARED(ne_b, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpL, tmpR)
!$OMP DO
do p = 1, n_mo
tmpL(:,1:3,p) = 0.d0
tmpR(:,1:3,p) = 0.d0
do i = 1, ne_b
do ipoint = 1, n_grid
tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i)
tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i)
enddo
enddo
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
! ---
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, j, ipoint) &
!$OMP SHARED(ne_b, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, wr1, &
!$OMP tmpL, tmpR, tmpJ, tmpS, tmpE, tmpF)
!$OMP DO
do p = 1, n_mo
do ipoint = 1, n_grid
tmpE(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p)
tmpE(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3))
tmpE(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p)
tmpE(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p)
tmpE(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p)
tmpF(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) &
+ mos_r_in_r(ipoint,p) * tmpS(ipoint)
tmpF(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p)
tmpF(ipoint,3,p) = tmpR(ipoint,1,p)
tmpF(ipoint,4,p) = tmpR(ipoint,2,p)
tmpF(ipoint,5,p) = tmpR(ipoint,3,p)
enddo
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
enddo ! ipoint
enddo ! j
enddo ! i
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', n_mo, n_mo, 5*n_grid, 1.d0 &
, tmpE(1,1,1), 5*n_grid, tmpF(1,1,1), 5*n_grid &
, 1.d0, noL_1e(1,1), n_mo)
! ---
else
tmpO = 0.d0
tmpJ = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) &
!$OMP SHARED(ne_b, ne_a, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpO, tmpJ)
allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3))
tmpO_priv = 0.d0
tmpJ_priv = 0.d0
!$OMP DO
do i = 1, ne_b
do ipoint = 1, n_grid
tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO
do i = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpO_priv(ipoint) = tmpO_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i)
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i)
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i)
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpO = tmpO + tmpO_priv
tmpJ = tmpJ + tmpJ_priv
!$OMP END CRITICAL
deallocate(tmpO_priv, tmpJ_priv)
!$OMP END PARALLEL
! ---
tmpM = 0.d0
tmpS = 0.d0
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) &
!$OMP SHARED(ne_b, ne_a, n_grid, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpM, tmpS)
allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid))
tmpM_priv = 0.d0
tmpS_priv = 0.d0
!$OMP DO COLLAPSE(2)
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO COLLAPSE(2)
do i = ne_b+1, ne_a
do j = 1, ne_b
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO COLLAPSE(2)
do i = ne_b+1, ne_a
do j = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
tmpS_priv(ipoint) = tmpS_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
+ 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
+ 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
tmpM = tmpM + tmpM_priv
tmpS = tmpS + tmpS_priv
!$OMP END CRITICAL
deallocate(tmpM_priv, tmpS_priv)
!$OMP END PARALLEL
! ---
do ipoint = 1, n_grid
tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1))
tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2))
tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3))
tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint)
tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint)
enddo
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, s, i, ipoint) &
!$OMP SHARED(n_mo, ne_b, n_grid, &
!$OMP ne_a, int2_grad1_u12, tmpC)
!$OMP DO COLLAPSE(2)
do s = 1, n_mo
do p = 1, n_mo
do ipoint = 1, n_grid
tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s)
tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s)
tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s)
enddo
tmpC(:,4,p,s) = 0.d0
do i = 1, ne_b
do ipoint = 1, n_grid
tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s)
enddo
enddo
do i = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) &
+ 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) &
+ 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s)
enddo
enddo
enddo ! p
enddo ! s
!$OMP END DO
!$OMP END PARALLEL
call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 &
, tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) &
, tmpD(1,1), 1 &
, 0.d0, noL_1e(1,1), 1)
! ---
allocate(tmpL0(n_grid,3,n_mo))
allocate(tmpR0(n_grid,3,n_mo))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, ipoint) &
!$OMP SHARED(ne_b, ne_a, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, tmpL0, tmpR0, tmpL, tmpR)
!$OMP DO
do p = 1, n_mo
tmpL0(:,1:3,p) = 0.d0
tmpR0(:,1:3,p) = 0.d0
do i = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpL0(ipoint,1,p) = tmpL0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i)
tmpL0(ipoint,2,p) = tmpL0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i)
tmpL0(ipoint,3,p) = tmpL0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i)
tmpR0(ipoint,1,p) = tmpR0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i)
tmpR0(ipoint,2,p) = tmpR0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i)
tmpR0(ipoint,3,p) = tmpR0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i)
enddo
enddo
tmpL(:,1:3,p) = tmpL0(:,1:3,p)
tmpR(:,1:3,p) = tmpR0(:,1:3,p)
do i = 1, ne_b
do ipoint = 1, n_grid
tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i)
tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i)
tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i)
tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i)
enddo
enddo
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
! ---
allocate(tmpE_os(n_grid,8,n_mo))
allocate(tmpF_os(n_grid,8,n_mo))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(p, i, j, ipoint) &
!$OMP SHARED(ne_b, ne_a, n_grid, n_mo, &
!$OMP mos_l_in_r, mos_r_in_r, &
!$OMP int2_grad1_u12, wr1, &
!$OMP tmpL, tmpL0, tmpR, tmpR0, tmpJ, tmpS, tmpE_os, tmpF_os)
!$OMP DO
do p = 1, n_mo
do ipoint = 1, n_grid
tmpE_os(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p)
tmpE_os(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3))
tmpE_os(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p)
tmpE_os(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p)
tmpE_os(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p)
tmpE_os(ipoint,6,p) = wr1(ipoint) * tmpL0(ipoint,1,p)
tmpE_os(ipoint,7,p) = wr1(ipoint) * tmpL0(ipoint,2,p)
tmpE_os(ipoint,8,p) = wr1(ipoint) * tmpL0(ipoint,3,p)
tmpF_os(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) &
+ mos_r_in_r(ipoint,p) * tmpS(ipoint)
tmpF_os(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p)
tmpF_os(ipoint,3,p) = tmpR(ipoint,1,p)
tmpF_os(ipoint,4,p) = tmpR(ipoint,2,p)
tmpF_os(ipoint,5,p) = tmpR(ipoint,3,p)
tmpF_os(ipoint,6,p) = tmpR0(ipoint,1,p)
tmpF_os(ipoint,7,p) = tmpR0(ipoint,2,p)
tmpF_os(ipoint,8,p) = tmpR0(ipoint,3,p)
enddo
do i = 1, ne_b
do j = 1, ne_b
do ipoint = 1, n_grid
tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
enddo ! ipoint
enddo ! j
enddo ! i
do i = ne_b+1, ne_a
do j = 1, ne_b
do ipoint = 1, n_grid
tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,p,j) * int2_grad1_u12(ipoint,1,j,i) &
+ int2_grad1_u12(ipoint,2,p,j) * int2_grad1_u12(ipoint,2,j,i) &
+ int2_grad1_u12(ipoint,3,p,j) * int2_grad1_u12(ipoint,3,j,i) )
tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,j,i) * int2_grad1_u12(ipoint,1,i,p) &
+ int2_grad1_u12(ipoint,2,j,i) * int2_grad1_u12(ipoint,2,i,p) &
+ int2_grad1_u12(ipoint,3,j,i) * int2_grad1_u12(ipoint,3,i,p) )
enddo ! ipoint
enddo ! j
enddo ! i
do i = ne_b+1, ne_a
do j = ne_b+1, ne_a
do ipoint = 1, n_grid
tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) &
+ int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) &
+ int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) )
tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) &
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) &
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) )
enddo ! ipoint
enddo ! j
enddo ! i
enddo ! p
!$OMP END DO
!$OMP END PARALLEL
deallocate(tmpL0, tmpR0)
call dgemm( 'T', 'N', n_mo, n_mo, 8*n_grid, 1.d0 &
, tmpE_os(1,1,1), 8*n_grid, tmpF_os(1,1,1), 8*n_grid &
, 1.d0, noL_1e(1,1), n_mo)
deallocate(tmpE_os, tmpF_os)
endif
call wall_time(t1)
write(*,"(A,2X,F15.7)") ' wall time for noL_1e (sec) = ', (t1 - t0)
return
end
! ---