diff --git a/devel/ccsd_gpu/gpu.c b/devel/ccsd_gpu/gpu.c index 5575926..47cf01b 100644 --- a/devel/ccsd_gpu/gpu.c +++ b/devel/ccsd_gpu/gpu.c @@ -11,6 +11,53 @@ void dgemm_(char*, char*, int*, int*, int*, double*, double*, int*, double*, int + +void gpu_dgemm(char transa, char transb, int m, int n, int k, double alpha, + double* A, int lda, double* B, int ldb, double beta, double* C, int ldc) +{ + cublasHandle_t handle; + cublasCreate(&handle); + + double * d_A; + double * d_B; + double * d_C; + cublasOperation_t ta, tb; + + if (transa == 'N') { + cudaMalloc((void**)&d_A, lda*k*sizeof(double)); + cublasSetMatrix(m, k, sizeof(double), A, lda, d_A, lda); + ta = CUBLAS_OP_N; + } else { + cudaMalloc((void**)&d_A, lda*m*sizeof(double)); + cublasSetMatrix(k, m, sizeof(double), A, lda, d_A, lda); + ta = CUBLAS_OP_T; + } + + if (transb == 'N') { + cudaMalloc((void**)&d_B, ldb*n*sizeof(double)); + cublasSetMatrix(k, n, sizeof(double), B, ldb, d_B, ldb); + tb = CUBLAS_OP_N; + } else { + cudaMalloc((void**)&d_B, ldb*k*sizeof(double)); + cublasSetMatrix(n, k, sizeof(double), B, ldb, d_B, ldb); + tb = CUBLAS_OP_T; + } + + cudaMalloc((void**)&d_C, ldc*n*sizeof(double)); + if (beta != 0.) { + cublasSetMatrix(m, n, sizeof(double), C, ldc, d_C, ldc); + } + + cublasDgemm(handle, ta, tb, m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc); + + cublasGetMatrix(m, n, sizeof(double), d_C, ldc, C, ldc); + + cudaFree(d_A); + cudaFree(d_B); + cudaFree(d_C); + cublasDestroy(handle); +} + void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo_num, double* t1, double* tau, @@ -35,7 +82,6 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo double* d_r2; lda = nO * nO; cudaMalloc((void **)&d_r2, lda * nV * nV * sizeof(double)); - cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2, lda, d_r2, lda); double* d_cc_space_v_vv_chol; lda = cholesky_mo_num * nV; @@ -60,9 +106,13 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo m=cholesky_mo_num*nV; n=nV; k=nO; A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc; cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m); + cublasDestroy(handle); #pragma omp parallel { + cublasHandle_t handle; + cublasCreate(&handle); + double* d_tmp_cc2; cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double)); @@ -126,42 +176,26 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo } } + cublasDestroy(handle); } - lda=nO*nO; - cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2, lda); - cudaFree(d_cc_space_v_vo_chol); cudaFree(d_cc_space_v_vv_chol); cudaFree(d_tau); cudaFree(d_t1); cudaFree(d_tmp_cc); - cudaFree(d_r2); - cublasDestroy(handle); + double * r2_tmp = malloc(nO*nO*nV*nV*sizeof(double)); + lda=nO*nO; + cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2_tmp, lda); + for (size_t i=0 ; i integrals for general MOs (excepted core and deleted ones) + END_DOC + + integer :: p,q + double precision, allocatable :: tmp_v(:,:,:,:) + + allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) + + call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) + + do q = 1, cc_n_mo + do p = 1, cc_n_mo + cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) + enddo + enddo + + deallocate(tmp_v) + +END_PROVIDER + +! aaii + +BEGIN_PROVIDER [double precision, cc_space_v_aaii, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aaii(a,i) = cc_space_v_vvoo(a,a,i,i) + enddo + enddo + + FREE cc_space_v_vvoo + +END_PROVIDER + +! iiaa + +BEGIN_PROVIDER [double precision, cc_space_v_iiaa, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iiaa(i,a) = cc_space_v_oovv(i,i,a,a) + enddo + enddo + + FREE cc_space_v_oovv + +END_PROVIDER + +! iijj + +BEGIN_PROVIDER [double precision, cc_space_v_iijj, (cc_nOa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! i,j: occupied MO + END_DOC + + integer :: i,j + + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_v_iijj(i,j) = cc_space_v_oooo(i,i,j,j) + enddo + enddo + + FREE cc_space_v_oooo + +END_PROVIDER + +! aabb + +BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a,b: virtual MO + END_DOC + + integer :: a,b + + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_v_aabb(a,b) = cc_space_v_vvvv(a,a,b,b) + enddo + enddo + + FREE cc_space_v_vvvv + +END_PROVIDER + +! iaia + +BEGIN_PROVIDER [double precision, cc_space_v_iaia, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: occupied MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaia(i,a) = cc_space_v_ovov(i,a,i,a) + enddo + enddo + + FREE cc_space_v_ovov + +END_PROVIDER + +! iaai + +BEGIN_PROVIDER [double precision, cc_space_v_iaai, (cc_nOa,cc_nVa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do a = 1, cc_nVa + do i = 1, cc_nOa + cc_space_v_iaai(i,a) = cc_space_v_ovvo(i,a,a,i) + enddo + enddo + + FREE cc_space_v_ovvo + +END_PROVIDER + +! aiia + +BEGIN_PROVIDER [double precision, cc_space_v_aiia, (cc_nVa,cc_nOa)] + + implicit none + + BEGIN_DOC + ! integrals + ! a: virtual MO + ! i: inactive MO + END_DOC + + integer :: a,i + + do i = 1, cc_nOa + do a = 1, cc_nVa + cc_space_v_aiia(a,i) = cc_space_v_voov(a,i,i,a) + enddo + enddo + + FREE cc_space_v_voov + +END_PROVIDER + +! oovv + +BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) + + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_oovv) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, cc_nVa + do a = 1, cc_nVa + do j = 1, cc_nOa + do i = 1, cc_nOa + cc_space_w_oovv(i,j,a,b) = 2d0 * tmp_v(i,j,a,b) - tmp_v(j,i,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER + +! vvoo + +BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] + + implicit none + + double precision, allocatable :: tmp_v(:,:,:,:) + integer :: i,j,a,b + + allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) + + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) + + !$OMP PARALLEL & + !$OMP SHARED(cc_nVa,cc_nOa,tmp_v,cc_space_w_vvoo) & + !$OMP PRIVATE(i,j,a,b)& + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, cc_nOa + do i = 1, cc_nOa + do b = 1, cc_nVa + do a = 1, cc_nVa + cc_space_w_vvoo(a,b,i,j) = 2d0 * tmp_v(a,b,i,j) - tmp_v(b,a,i,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_v) + +END_PROVIDER + +! F_oo + +BEGIN_PROVIDER [double precision, cc_space_f_oo, (cc_nOa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nOa, cc_list_occ,cc_list_occ, cc_space_f_oo) + +END_PROVIDER + +! F_ov + +BEGIN_PROVIDER [double precision, cc_space_f_ov, (cc_nOa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nOa,cc_nVa, cc_list_occ,cc_list_vir, cc_space_f_ov) + +END_PROVIDER + +! F_vo + +BEGIN_PROVIDER [double precision, cc_space_f_vo, (cc_nVa, cc_nOa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nOa, cc_list_vir,cc_list_occ, cc_space_f_vo) + +END_PROVIDER + +! F_vv + +BEGIN_PROVIDER [double precision, cc_space_f_vv, (cc_nVa, cc_nVa)] + + implicit none + + call gen_f_space(psi_det(1,1,cc_ref), cc_nVa,cc_nVa, cc_list_vir,cc_list_vir, cc_space_f_vv) + +END_PROVIDER + +! F_o + +BEGIN_PROVIDER [double precision, cc_space_f_o, (cc_nOa)] + + implicit none + + integer :: i + + do i = 1, cc_nOa + cc_space_f_o(i) = cc_space_f_oo(i,i) + enddo + +END_PROVIDER + +! F_v + +BEGIN_PROVIDER [double precision, cc_space_f_v, (cc_nVa)] + + implicit none + + integer :: i + + do i = 1, cc_nVa + cc_space_f_v(i) = cc_space_f_vv(i,i) + enddo + +END_PROVIDER + +! Shift + +subroutine shift_idx_spin(s,n_S,shift) + + implicit none + + BEGIN_DOC + ! Shift for the partitionning alpha/beta of the spin orbitals + ! n_S(1): number of spin alpha in the correspondong list + ! n_S(2): number of spin beta in the correspondong list + END_DOC + + integer, intent(in) :: s, n_S(2) + integer, intent(out) :: shift + + if (s == 1) then + shift = 0 + else + shift = n_S(1) + endif + +end + +! F + +subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) + + implicit none + + BEGIN_DOC + ! Compute the Fock matrix corresponding to two lists of spin orbitals. + ! Ex: occ/occ, occ/vir,... + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2) + integer, intent(in) :: dim1, dim2 + + double precision, intent(out) :: f(dim1, dim2) + + double precision, allocatable :: tmp_F(:,:) + integer :: i,j, idx_i,idx_j,i_shift,j_shift + integer :: tmp_i,tmp_j + integer :: si,sj,s + PROVIDE big_array_exchange_integrals big_array_coulomb_integrals + + allocate(tmp_F(mo_num,mo_num)) + + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + s = si + sj + + if (s == 2 .or. s == 4) then + call get_fock_matrix_spin(det,sj,tmp_F) + else + do j = 1, mo_num + do i = 1, mo_num + tmp_F(i,j) = 0d0 + enddo + enddo + endif + + do tmp_j = 1, n2_S(sj) + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + f(idx_i,idx_j) = tmp_F(i,j) + enddo + enddo + + enddo + enddo + + deallocate(tmp_F) + +end + +! Get F + +subroutine get_fock_matrix_spin(det,s,f) + + implicit none + + BEGIN_DOC + ! Fock matrix alpha or beta of an arbitrary det + END_DOC + + integer(bit_kind), intent(in) :: det(N_int,2) + integer, intent(in) :: s + + double precision, intent(out) :: f(mo_num,mo_num) + + integer :: p,q,i,s1,s2 + integer(bit_kind) :: res(N_int,2) + logical :: ok + double precision :: mo_two_e_integral + + if (s == 1) then + s1 = 1 + s2 = 2 + else + s1 = 2 + s2 = 1 + endif + + PROVIDE big_array_coulomb_integrals big_array_exchange_integrals + + !$OMP PARALLEL & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) & + !$OMP PRIVATE(p,q,ok,i,res)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(1) + do q = 1, mo_num + do p = 1, mo_num + f(p,q) = mo_one_e_integrals(p,q) + do i = 1, mo_num + call apply_hole(det, s1, i, res, ok, N_int) + if (ok) then +! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q) + endif + enddo + do i = 1, mo_num + call apply_hole(det, s2, i, res, ok, N_int) + if (ok) then + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) + endif + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! V + +subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3,dim4, v) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3, dim4 + double precision, intent(out) :: v(dim1,dim2,dim3,dim4) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx + +subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_l,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_l(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_k + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_l <= n4_S(1)) then + sl = 1 + else + sl = 2 + endif + call shift_idx_spin(sl,n4_S,l_shift) + tmp_l = idx_l - l_shift + l = list4(tmp_l,sl) + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx_ij_l + +subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_k) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_k,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_k(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_j,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_k <= n3_S(1)) then + sk = 1 + else + sk = 2 + endif + call shift_idx_spin(sk,n3_S,k_shift) + tmp_k = idx_k - k_shift + k = list3(tmp_k,sk) + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + j = list2(tmp_j,sj) + idx_j = tmp_j + j_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end + +! V_3idx_i_kl + +subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_j) + + implicit none + + BEGIN_DOC + ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... + END_DOC + + integer, intent(in) :: n1,n2,n3,n4,idx_j,n1_S(2),n2_S(2),n3_S(2),n4_S(2) + integer, intent(in) :: list1(n1,2), list2(n2,2), list3(n3,2), list4(n4,2) + integer, intent(in) :: dim1, dim2, dim3 + double precision, intent(out) :: v_j(dim1,dim2,dim3) + + double precision :: mo_two_e_integral + integer :: i,j,k,l,idx_i,idx_k,idx_l + integer :: i_shift,j_shift,k_shift,l_shift + integer :: tmp_i,tmp_j,tmp_k,tmp_l + integer :: si,sj,sk,sl,s + + PROVIDE cc_space_v + + if (idx_j <= n2_S(1)) then + sj = 1 + else + sj = 2 + endif + call shift_idx_spin(sj,n2_S,j_shift) + tmp_j = idx_j - j_shift + j = list2(tmp_j,sj) + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo + enddo + enddo + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo + enddo + enddo + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_i = 1, n1_S(si) + l = list4(tmp_l,sl) + idx_l = tmp_l + l_shift + k = list3(tmp_k,sk) + idx_k = tmp_k + k_shift + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + + enddo + enddo + enddo + !$OMP END PARALLEL + +end