diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index b9d14efe..0cb862ae 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -38,7 +38,8 @@ let run slave ?prefix exe ezfio_file = | Unix.Unix_error _ -> try_new_port (port_number+100) in let result = - try_new_port 41279 + let port = 10*(Unix.getpid () mod 2823) + 32_769 in + try_new_port port in Zmq.Socket.close dummy_socket; Zmq.Context.terminate zmq_context; diff --git a/scripts/compilation/cache_compile.py b/scripts/compilation/cache_compile.py index 440f6498..473976e7 100755 --- a/scripts/compilation/cache_compile.py +++ b/scripts/compilation/cache_compile.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Save the .o from a .f90 -and is the .o is asked a second time, retur it +and is the .o is asked a second time, return it Take in argv command like: ifort -g -openmp -I IRPF90_temp/Ezfio_files/ -c IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.F90 -o IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.o """ diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 446bf730..4f9ae76d 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -104,6 +104,9 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] IF(do_pseudo) THEN ao_integrals_n_e += ao_pseudo_integrals ENDIF + IF(point_charges) THEN + ao_integrals_n_e += ao_integrals_pt_chrg + ENDIF endif diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index bb81b141..77eb6ddc 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - cholesky_ao_num_guess = ao_num*ao_num / 2 + cholesky_ao_num_guess = ao_num*ao_num END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -44,19 +44,12 @@ END_PROVIDER do m=0,9 do l=1+m,ao_num,10 !$OMP DO SCHEDULE(dynamic) - do j=1,l + do j=1,ao_num do k=1,ao_num - do i=1,min(k,j) + do i=1,ao_num if (ao_two_e_integral_zero(i,j,k,l)) cycle integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral enddo enddo enddo diff --git a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f index 527a98d5..ea9ff009 100644 --- a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f +++ b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f @@ -29,14 +29,14 @@ double precision function ao_two_e_integral_cosgtos(i, j, k, l) complex*16 :: integral5, integral6, integral7, integral8 complex*16 :: integral_tot - double precision :: ao_two_e_integral_cosgtos_schwartz_accel + double precision :: ao_2e_cosgtos_schwartz_accel complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then !print *, ' with shwartz acc ' - ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + ao_two_e_integral_cosgtos = ao_2e_cosgtos_schwartz_accel(i, j, k, l) else !print *, ' without shwartz acc ' @@ -294,7 +294,7 @@ end function ao_two_e_integral_cosgtos ! --- -double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) +double precision function ao_2e_cosgtos_schwartz_accel(i, j, k, l) BEGIN_DOC ! integral of the AO basis or (ij|kl) @@ -329,7 +329,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + ao_2e_cosgtos_schwartz_accel = 0.d0 dim1 = n_pt_max_integrals @@ -519,8 +519,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -698,8 +697,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -709,11 +707,11 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) deallocate(schwartz_kl) -end function ao_two_e_integral_cosgtos_schwartz_accel +end function ao_2e_cosgtos_schwartz_accel ! --- -BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_2e_cosgtos_schwartz, (ao_num,ao_num)] BEGIN_DOC ! Needed to compute Schwartz inequalities @@ -723,16 +721,16 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,a integer :: i, k double precision :: ao_two_e_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + ao_2e_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) - !$OMP PARALLEL DO PRIVATE(i,k) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_2e_cosgtos_schwartz) & !$OMP SCHEDULE(dynamic) do i = 1, ao_num do k = 1, i - ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) - ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + ao_2e_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_2e_cosgtos_schwartz(k,i) = ao_2e_cosgtos_schwartz(i,k) enddo enddo !$OMP END PARALLEL DO diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index fd185641..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -1,10 +1,13 @@ BEGIN_PROVIDER [integer, n_points_final_grid] - implicit none + BEGIN_DOC ! Number of points which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_final_grid = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -16,9 +19,11 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo enddo - print*,'n_points_final_grid = ',n_points_final_grid - print*,'n max point = ',n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) + END_PROVIDER ! --- @@ -41,6 +46,10 @@ END_PROVIDER implicit none integer :: i, j, k, l, i_count double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -62,20 +71,34 @@ END_PROVIDER enddo enddo + FREE grid_points_per_atom + FREE final_weight_at_r + + call wall_time(wall1) + print *, ' wall time for final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] - implicit none + BEGIN_DOC -! Transposed final_grid_points + ! Transposed final_grid_points END_DOC + implicit none integer :: i,j - do j=1,3 - do i=1,n_points_final_grid + + do j = 1, 3 + do i = 1, n_points_final_grid final_grid_points_transp(i,j) = final_grid_points(j,i) enddo enddo + END_PROVIDER + +! --- + + diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 42bbe315..9ca624b9 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -1,21 +1,38 @@ +! --- + program bi_ort_ints - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 10 - my_n_pt_a_grid = 14 - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + !my_n_pt_r_grid = 10 + !my_n_pt_a_grid = 14 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + ! call test_3e +! call test_5idx +! call test_5idx2 + !call test_4idx + call test_4idx2() + call test_5idx2 call test_5idx -! call test_5idx2 end subroutine test_5idx2 PROVIDE three_e_5_idx_cycle_2_bi_ort end +subroutine test_4idx2() + !PROVIDE three_e_4_idx_direct_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort +end + subroutine test_3e implicit none integer :: i,k,j,l,m,n,ipoint @@ -60,6 +77,8 @@ subroutine test_5idx k = 1 n = 0 accu = 0.d0 + PROVIDE three_e_5_idx_direct_bi_ort_old + do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -68,29 +87,73 @@ subroutine test_5idx ! if (dabs(three_e_5_idx_direct_bi_ort(m,l,j,k,i) - three_e_5_idx_exch12_bi_ort(m,l,i,k,j)) > 1.d-10) then ! stop ! endif - -! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif +! +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) ! contrib = dabs(new - ref) ! accu += contrib ! if(contrib .gt. 1.d-10)then -! print*,'direct' +! print*,'exch12' ! print*,i,k,j,l,m ! print*,ref,new,contrib ! stop ! endif ! - new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch12' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif +! +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch13' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif ! ! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) ! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) @@ -145,3 +208,184 @@ subroutine test_5idx end + +! --- + +subroutine test_4idx() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-5 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_exch12_bi_ort_old +! PROVIDE three_e_4_idx_exch12_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_exch12_bi_ort (l,k,j,i) +! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_exch12_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_cycle_2_bi_ort_old +! PROVIDE three_e_4_idx_cycle_2_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i) +! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_cycle_2_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + return +end diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 771d3274..355fa38f 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -54,7 +54,7 @@ BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_poi enddo enddo -! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu + !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu END_PROVIDER @@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao_test + else PROVIDE int2_grad1_u12_ao @@ -153,7 +155,7 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint double precision :: wall0, wall1 PROVIDE mo_l_coef mo_r_coef @@ -180,6 +182,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !call wall_time(wall1) !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !call print_memory_usage() END_PROVIDER @@ -188,7 +191,11 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !call wall_time(wall0) + !print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index b34638b8..ae100fb5 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -23,11 +23,11 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) + !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num do j = 1, mo_num do m = j, mo_num @@ -36,8 +36,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do i = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 95b57e37..ee7e88ef 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -1,287 +1,481 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_4_idx_direct_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) - three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) + double precision, allocatable :: tmp_3d(:,:,:) - three_e_4_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...' + print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3) + !$OMP DO COLLAPSE(2) do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 - call print_memory_usage() + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) -END_PROVIDER + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) -! -- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) - three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) - three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) - three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral - enddo + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp3) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp5, n_points_final_grid, tmp4, n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , 0.d0, tmp_3d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp5) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp4) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, tmp6, n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk_old.irp.f b/src/bi_ort_ints/three_body_ijmk_old.irp.f new file mode 100644 index 00000000..1a67f35b --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_old.irp.f @@ -0,0 +1,290 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) + three_e_4_idx_direct_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_direct_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) + three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_1_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! -- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) + three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_2_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) + three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) + three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch13_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) + three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch12_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index d67e1434..c2583c30 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -15,224 +15,227 @@ end ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = :: : notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) - double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) - double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision, allocatable :: grad_mli(:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:) + double precision, allocatable :: tmp_mat(:,:,:) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + call print_memory_usage print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) - do m = 1, mo_num + three_e_5_idx_direct_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_1_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_2_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_exch23_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_exch13_bi_ort (:,:,:,:,:) = 0.d0 - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + call print_memory_usage + + allocate(tmp_mat(mo_num,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP grad_mli, orb_mat) - !$OMP DO COLLAPSE(2) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) do i=1,mo_num do l=1,mo_num - do ipoint=1, n_points_final_grid + do ipoint=1, n_points_final_grid - grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & - int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & - int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & - int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + orb_mat(ipoint,l,i) = final_weight_at_r_vector(ipoint) & + * mos_l_in_r_array_transp(ipoint,l) & + * mos_r_in_r_array_transp(ipoint,i) - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - - enddo - enddo - enddo - - !$OMP END DO - !$OMP END PARALLEL - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & - orb_mat, n_points_final_grid, & - grad_mli, n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) - enddo enddo enddo enddo !$OMP END PARALLEL DO - deallocate(orb_mat,grad_mli) + tmp_mat = 0.d0 + call print_memory_usage + do m = 1, mo_num - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + grad_mli(ipoint,l) = & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) +& + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) +& + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + enddo + enddo + !$OMP END PARALLEL DO - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + call dgemm('T','N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0,& + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num + !$OMP PARALLEL PRIVATE(j,k,l) + !$OMP DO + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP DO do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + do k = 1, mo_num + three_e_5_idx_direct_bi_ort(m,k,i,l,j) = three_e_5_idx_direct_bi_ort(m,k,i,l,j) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + deallocate(grad_mli) + + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + allocate(rm_grad_ik(n_points_final_grid,3,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num)) - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid + + rm_grad_ik(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - deallocate(lm_grad_ik) - - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,k,j) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,j,k) + enddo enddo enddo + !$OMP END PARALLEL DO + + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l) + three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) = three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) - tmp_mat(l,j,k) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k) + three_e_5_idx_exch13_bi_ort (m,l,i,k,j) = three_e_5_idx_exch13_bi_ort (m,l,i,k,j) - tmp_mat(k,j,l) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,j,k) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,k,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + enddo - enddo - !$OMP END PARALLEL DO - deallocate(lk_grad_mi) - deallocate(rm_grad_ik) - deallocate(rk_grad_im) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) + deallocate(lk_grad_mi) + deallocate(lm_grad_ik) + enddo + deallocate(tmp_mat) + + deallocate(orb_mat) call wall_time(wall1) print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 42130575..25572854 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -46,7 +46,7 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) enddo enddo - + END_PROVIDER ! --- @@ -116,7 +116,7 @@ end subroutine give_all_mos_l_at_r ! --- -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)] +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] BEGIN_DOC ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point @@ -130,7 +130,7 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) enddo enddo - + END_PROVIDER ! --- diff --git a/src/ccsd/EZFIO.cfg b/src/ccsd/EZFIO.cfg new file mode 100644 index 00000000..328cd981 --- /dev/null +++ b/src/ccsd/EZFIO.cfg @@ -0,0 +1,11 @@ +[energy] +type: double precision +doc: CCSD energy +interface: ezfio + +[energy_t] +type: double precision +doc: CCSD(T) energy +interface: ezfio + + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 1467d9a4..40c57188 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -135,8 +135,11 @@ subroutine run_ccsd_space_orb write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r print*,'' - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + if (write_amplitudes) then + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + call ezfio_set_utils_cc_io_amplitudes('Read') + endif ! Deallocation if (cc_update_method == 'diis') then @@ -147,6 +150,7 @@ subroutine run_ccsd_space_orb ! CCSD(T) double precision :: e_t + e_t = 0.d0 if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then @@ -182,8 +186,7 @@ subroutine run_ccsd_space_orb print*,'' endif - print*,'Reference determinant:' - call print_det(det,N_int) + call save_energy(uncorr_energy + energy, e_t) deallocate(t1,t2) diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index 23e2cef1..a267cc45 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -269,8 +269,11 @@ subroutine run_ccsd_spin_orb write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r print*,'' - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + if (write_amplitudes) then + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + call ezfio_set_utils_cc_io_amplitudes('Read') + endif ! Deallocate if (cc_update_method == 'diis') then @@ -284,8 +287,9 @@ subroutine run_ccsd_spin_orb deallocate(v_ovoo,v_oovo) deallocate(v_ovvo,v_ovov,v_oovv) + double precision :: t_corr + t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then - double precision :: t_corr print*,'CCSD(T) calculation...' call wall_time(ta) !allocate(v_vvvo(nV,nV,nV,nO)) @@ -307,8 +311,8 @@ subroutine run_ccsd_spin_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' print*,'' endif - print*,'Reference determinant:' - call print_det(det,N_int) + + call save_energy(uncorr_energy + energy, t_corr) deallocate(f_oo,f_ov,f_vv,f_o,f_v) deallocate(v_ooov,v_vvoo,t1,t2) diff --git a/src/ccsd/save_energy.irp.f b/src/ccsd/save_energy.irp.f new file mode 100644 index 00000000..30d93ec3 --- /dev/null +++ b/src/ccsd/save_energy.irp.f @@ -0,0 +1,13 @@ +subroutine save_energy(E,ET) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E, ET + call ezfio_set_ccsd_energy(E) + if (ET /= 0.d0) then + call ezfio_set_ccsd_energy_t(E+ET) + endif +end + + diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0705d103..b8fa2895 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -88,6 +88,10 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) enddo + if ((subset == 1).and.(sum(hole_mask(:,2)) == 0_bit_kind)) then + ! No beta electron to excite + call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b) + endif call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset) deallocate(fock_diag_tmp) end subroutine @@ -142,7 +146,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d use selection_types implicit none BEGIN_DOC -! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted +! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC integer, intent(in) :: i_generator, subset, csubset @@ -237,7 +241,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d enddo ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4 - ! Remove also contributions < 1.d-20) do j=1,N_det_alpha_unique call get_excitation_degree_spin(psi_det_alpha_unique(1,j), & psi_det_generators(1,1,i_generator), nt, N_int) @@ -480,7 +483,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d do s2=s1,2 sp = s1 - if(s1 /= s2) sp = 3 + if(s1 /= s2) then + sp = 3 + endif ib = 1 if(s1 == s2) ib = i1+1 @@ -528,7 +533,10 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) end subroutine -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + +BEGIN_TEMPLATE + +subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types implicit none @@ -562,7 +570,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d s1 = sp s2 = sp end if - call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + + if ($IS_DOUBLE) then + if (h2 == 0) then + print *, 'h2=0 in '//trim(irp_here) + stop + endif + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + else + if (h2 /= 0) then + print *, 'h2 /= in '//trim(irp_here) + stop + endif + call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, mask, ok, N_int) + endif E_shift = 0.d0 if (h0_type == 'CFG') then @@ -570,12 +591,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j) endif - do p1=1,mo_num - if(bannedOrb(p1, s1)) cycle + $DO_p1 +! do p1=1,mo_num + + if (bannedOrb(p1, s1)) cycle ib = 1 if(sp /= 3) ib = p1+1 - do p2=ib,mo_num + $DO_p2 + ! do p2=ib,mo_num ! ----- ! /!\ Generating only single excited determinants doesn't work because a @@ -584,9 +608,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! detected as already generated when generating in the future with a ! double excitation. ! ----- - - if(bannedOrb(p2, s2)) cycle - if(banned(p1,p2)) cycle + if ($IS_DOUBLE) then + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + endif if(pseudo_sym)then if(dabs(mat(1, p1, p2)).lt.thresh_sym)then @@ -596,7 +621,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d val = maxval(abs(mat(1:N_states, p1, p2))) if( val == 0d0) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + if ($IS_DOUBLE) then + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + else + call apply_particle(mask, s1, p1, det, ok, N_int) + endif if (do_only_cas) then integer, external :: number_of_holes, number_of_particles @@ -797,7 +826,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection if (h0_type == 'CFG') then - w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & / c0_weight(istate) else w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) @@ -857,10 +886,19 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(w <= buf%mini) then call add_to_selection_buffer(buf, det, w) end if - end do - end do + ! enddo + $ENDDO_p1 +! enddo + $ENDDO_p2 end +SUBST [ DOUBLE , DO_p1 , ENDDO_p1 , DO_p2 , ENDDO_p2 , IS_DOUBLE ] + +double ; do p1=1,mo_num ; enddo ; do p2=ib,mo_num ; enddo ; .True. ;; +single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; + +END_TEMPLATE + subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) use bitmasks implicit none @@ -882,6 +920,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere PROVIDE psi_selectors_coef_transp psi_det_sorted mat = 0d0 + p=0 do i=1,N_int negMask(i,1) = not(mask(i,1)) @@ -1435,7 +1474,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,sp) p2 = p(2,sp) do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle + if (bannedOrb(puti, sp)) cycle call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num @@ -1446,7 +1485,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj)) + hij = hij_cache1(putj) - hij_cache2(putj) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1506,7 +1545,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none BEGIN_DOC -! Identify the determinants in det which are in the internal space. These are +! Identify the determinants in det that are in the internal space. These are ! the determinants that can be produced by creating two particles on the mask. END_DOC @@ -1534,7 +1573,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl end do - ! If det(i) < det(i_gen), it hs already been considered + ! If det(i) < det(i_gen), it has already been considered if(interesting(i) < i_gen) then fullMatch = .true. return @@ -1585,352 +1624,4 @@ end -! OLD unoptimized routines for debugging -! ====================================== - -subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - integer :: i, j, s, h1, h2, p1, p2, puti, putj - double precision :: hij, phase - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - integer :: bant - bant = 1 - - - if(sp == 3) then ! AB - h1 = p(1,1) - h2 = p(1,2) - do p1=1, mo_num - if(bannedOrb(p1, 1)) cycle - do p2=1, mo_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = mo_two_e_integral(p1, p2, h1, h2) * phase - end if - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do - else ! AA BB - p1 = p(1,sp) - p2 = p(2,sp) - do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle - do putj=puti+1, mo_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) - end if - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if -end - -subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - logical, allocatable :: lbanned(:,:) - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j - integer :: hfix, pfix, h1, h2, p1, p2, ib - - integer, parameter :: turn2(2) = (/2,1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - - - allocate (lbanned(mo_num, 2)) - lbanned = bannedOrb - - do i=1, p(0,1) - lbanned(p(i,1), 1) = .true. - end do - do i=1, p(0,2) - lbanned(p(i,2), 2) = .true. - end do - - ma = 1 - if(p(0,2) >= 2) ma = 2 - mi = turn2(ma) - - bant = 1 - - if(sp == 3) then - !move MA - if(ma == 2) bant = 2 - puti = p(1,mi) - hfix = h(1,ma) - p1 = p(1,ma) - p2 = p(2,ma) - if(.not. bannedOrb(puti, mi)) then - tmp_row = 0d0 - do putj=1, hfix-1 - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - do putj=hfix+1, mo_num - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - - if(ma == 1) then - mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) - else - mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) - end if - end if - - !MOVE MI - pfix = p(1,mi) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,mi)) cycle - !p1 fixed - putj = p1 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p2 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - - if(mi == 1) then - mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) - mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) - else - mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) - mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) - end if - else - if(p(0,ma) == 3) then - do i=1,3 - hfix = h(1,ma) - puti = p(i, ma) - p1 = p(turn3(1,i), ma) - p2 = p(turn3(2,i), ma) - tmp_row = 0d0 - do putj=1,hfix-1 - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - do putj=hfix+1,mo_num - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - - mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) - mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) - end do - else - hfix = h(1,mi) - pfix = p(1,mi) - p1 = p(1,ma) - p2 = p(2,ma) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,ma)) cycle - putj = p2 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p1 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) - mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) - mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) - mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) - end if - end if - deallocate(lbanned) - - !! MONO - if(sp == 3) then - s1 = 1 - s2 = 2 - else - s1 = sp - s2 = sp - end if - - do i1=1,p(0,s1) - ib = 1 - if(s1 == s2) ib = i1+1 - do i2=ib,p(0,s2) - p1 = p(i1,s1) - p2 = p(i2,s2) - if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do -end - -subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(2,N_int) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - double precision, external :: get_phase_bi, mo_two_e_integral - - integer :: i, j, tip, ma, mi, puti, putj - integer :: h1, h2, p1, p2, i1, i2 - double precision :: hij, phase - - integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) - integer, parameter :: turn2(2) = (/2, 1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - bant = 1 - - tip = p(0,1) * p(0,2) - - ma = sp - if(p(0,1) > p(0,2)) ma = 1 - if(p(0,1) < p(0,2)) ma = 2 - mi = mod(ma, 2) + 1 - - if(sp == 3) then - if(ma == 2) bant = 2 - - if(tip == 3) then - puti = p(1, mi) - do i = 1, 3 - putj = p(i, ma) - if(banned(putj,puti,bant)) cycle - i1 = turn3(1,i) - i2 = turn3(2,i) - p1 = p(i1, ma) - p2 = p(i2, ma) - h1 = h(1, ma) - h2 = h(2, ma) - - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) - if(ma == 1) then - mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij - else - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end do - else - h1 = h(1,1) - h2 = h(1,2) - do j = 1,2 - putj = p(j, 2) - p2 = p(turn2(j), 2) - do i = 1,2 - puti = p(i, 1) - - if(banned(puti,putj,bant)) cycle - p1 = p(turn2(i), 1) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if - - else - if(tip == 0) then - h1 = h(1, ma) - h2 = h(2, ma) - do i=1,3 - puti = p(i, ma) - do j=i+1,4 - putj = p(j, ma) - if(banned(puti,putj,1)) cycle - - i1 = turn2d(1, i, j) - i2 = turn2d(2, i, j) - p1 = p(i1, ma) - p2 = p(i2, ma) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - else if(tip == 3) then - h1 = h(1, mi) - h2 = h(1, ma) - p1 = p(1, mi) - do i=1,3 - puti = p(turn3(1,i), ma) - putj = p(turn3(2,i), ma) - if(banned(puti,putj,1)) cycle - p2 = p(i, ma) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) - mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij - end do - else ! tip == 4 - puti = p(1, sp) - putj = p(2, sp) - if(.not. banned(puti,putj,1)) then - p1 = p(1, mi) - p2 = p(2, mi) - h1 = h(1, mi) - h2 = h(2, mi) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end if - end if -end - diff --git a/src/cipsi/selection_old.irp.f b/src/cipsi/selection_old.irp.f new file mode 100644 index 00000000..8fd5bc2b --- /dev/null +++ b/src/cipsi/selection_old.irp.f @@ -0,0 +1,350 @@ + +! OLD unoptimized routines for debugging +! ====================================== + +subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + integer :: bant + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = mo_two_e_integral(p1, p2, h1, h2) * phase + end if + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num +! do not cycle here? otherwise singles will be missed?? + if(bannedOrb(puti, sp)) cycle + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if +end + +subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + + + allocate (lbanned(mo_num, 2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + + if(ma == 1) then + mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) + else + mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) + mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) + end if + else + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + do putj=hfix+1,mo_num + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) + end if + end if + deallocate(lbanned) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do +end + +subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(2,N_int) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_two_e_integral + + integer :: i, j, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + if(ma == 1) then + mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij + else + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + do j=i+1,4 + putj = p(j, ma) + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + putj = p(turn3(2,i), ma) + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) + mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end if + end if +end + diff --git a/src/cipsi/selection_singles.irp.f b/src/cipsi/selection_singles.irp.f new file mode 100644 index 00000000..3821576c --- /dev/null +++ b/src/cipsi/selection_singles.irp.f @@ -0,0 +1,356 @@ +use bitmasks + +subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf) + use bitmasks + use selection_types + implicit none + BEGIN_DOC +! Select determinants connected to i_det by H + END_DOC + integer, intent(in) :: i_gen + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: E0(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(selection_buffer), intent(inout) :: buf + + logical, allocatable :: banned(:,:), bannedOrb(:) + double precision, allocatable :: mat(:,:,:) + integer :: i, j, k + integer :: h1,h2,s1,s2,i1,i2,ib,sp + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + logical :: fullMatch, ok + + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) + enddo + + allocate(banned(mo_num,mo_num), bannedOrb(mo_num), mat(N_states, mo_num, 1)) + banned = .False. + + ! Create lists of holes and particles + ! ----------------------------------- + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + do sp=1,2 + do i=1, N_holes(sp) + h1 = hole_list(i,sp) + call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) + bannedOrb = .true. + do j=1,N_particles(sp) + bannedOrb(particle_list(j, sp)) = .false. + end do + call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) + if(fullMatch) cycle + mat = 0d0 + call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, mat(1,1,1)) + call fill_buffer_single(i_gen, sp, h1, 0, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + end do + enddo +end subroutine + + +subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + integer, intent(in) :: i_gen, N, sp + logical, intent(inout) :: banned(mo_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3), nt + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + nt = 0 + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) + end do + + if(nt > 3) cycle + + if(nt <= 2 .and. i < i_gen) then + fullMatch = .true. + return + end if + + call bitstring_to_list(myMask(1,sp), list(1), na, N_int) + + if(nt == 3 .and. i < i_gen) then + do j=1,na + banned(list(j)) = .true. + end do + else if(nt == 1 .and. na == 1) then + banned(list(1)) = .true. + end if + end do genl +end subroutine + + +subroutine splash_p(mask, sp, det, coefs, N_sel, bannedOrb, vect) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) + double precision, intent(in) :: coefs(N_states, N_sel) + integer, intent(in) :: sp, N_sel + logical, intent(inout) :: bannedOrb(mo_num) + double precision, intent(inout) :: vect(N_states, mo_num) + + integer :: i, j, h(0:2,2), p(0:3,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int, 2) + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 3) cycle + + do j=1,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) + + call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) + + call get_mask_phase(psi_det_sorted(1,1,i), phasemask, N_int) + + if(nt == 3) then + call get_m2(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else if(nt == 2) then + call get_m1(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else + call get_m0(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + end if + end do +end subroutine + + +subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti + double precision :: hij + double precision, external :: get_phase_bi, mo_two_e_integral + + integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + integer, parameter :: turn2(2) = (/2,1/) + + if(h(0,sp) == 2) then + h1 = h(1, sp) + h2 = h(2, sp) + do i=1,3 + puti = p(i, sp) + if(bannedOrb(puti)) cycle + p1 = p(turn3_2(1,i), sp) + p2 = p(turn3_2(2,i), sp) + hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) + hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end do + else if(h(0,sp) == 1) then + sfix = turn2(sp) + hfix = h(1,sfix) + pfix = p(1,sfix) + hmob = h(1,sp) + do j=1,2 + puti = p(j, sp) + if(bannedOrb(puti)) cycle + pmob = p(turn2(j), sp) + hij = mo_two_e_integral(pfix, pmob, hfix, hmob) + hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) + vect(:, puti) += hij * coefs + end do + else + puti = p(1,sp) + if(.not. bannedOrb(puti)) then + sfix = turn2(sp) + p1 = p(1,sfix) + p2 = p(2,sfix) + h1 = h(1,sfix) + h2 = h(2,sfix) + hij = (mo_two_e_integral(p1,p2,h1,h2) - mo_two_e_integral(p2,p1,h1,h2)) + hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end if + end if +end subroutine + +subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, hole, p1, p2, sh + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + double precision, external :: get_phase_bi,mo_two_e_integral + + lbanned = bannedOrb + sh = 1 + if(h(0,2) == 1) sh = 2 + hole = h(1, sh) + lbanned(p(1,sp)) = .true. + if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. + !print *, "SPm1", sp, sh + + p1 = p(1, sp) + + if(sp == sh) then + p2 = p(2, sp) + lbanned(p2) = .true. + + do i=1,hole-1 + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, i, hole) - mo_two_e_integral(p2, p1, i, hole)) + hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + do i=hole+1,mo_num + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, hole, i) - mo_two_e_integral(p2, p1, hole, i)) + hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) + vect(:,i) += hij * coefs + end do + + call apply_particle(mask, sp, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p2) += hij * coefs + else + p2 = p(1, sh) + do i=1,mo_num + if(lbanned(i)) cycle + hij = mo_two_e_integral(p1, p2, i, hole) + hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + end if + + call apply_particle(mask, sp, p1, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p1) += hij * coefs +end subroutine + +subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + + lbanned = bannedOrb + lbanned(p(1,sp)) = .true. + do i=1,mo_num + if(lbanned(i)) cycle + call apply_particle(mask, sp, i, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, i) += hij * coefs + end do +end subroutine + + + +! +!subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) +! use bitmasks +! use selection_types +! implicit none +! +! integer, intent(in) :: i_generator, sp, h1 +! double precision, intent(in) :: vect(N_states, mo_num) +! logical, intent(in) :: bannedOrb(mo_num) +! double precision, intent(in) :: fock_diag_tmp(mo_num) +! double precision, intent(in) :: E0(N_states) +! double precision, intent(inout) :: pt2(N_states) +! type(selection_buffer), intent(inout) :: buf +! logical :: ok +! integer :: s1, s2, p1, p2, ib, istate +! integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) +! double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp +! double precision, external :: diag_H_mat_elem_fock +! +! +! call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) +! +! do p1=1,mo_num +! if(bannedOrb(p1)) cycle +! if(vect(1, p1) == 0d0) cycle +! call apply_particle(mask, sp, p1, det, ok, N_int) +! +! +! Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) +! max_e_pert = 0d0 +! +! do istate=1,N_states +! val = vect(istate, p1) + vect(istate, p1) +! delta_E = E0(istate) - Hii +! tmp = dsqrt(delta_E * delta_E + val * val) +! if (delta_E < 0.d0) then +! tmp = -tmp +! endif +! e_pert = 0.5d0 * ( tmp - delta_E) +! pt2(istate) += e_pert +! if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert +! end do +! +! if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) +! end do +!end subroutine +! diff --git a/src/ezfio_files/00.create.bats b/src/ezfio_files/00.create.bats index cfa6247d..49430a0b 100644 --- a/src/ezfio_files/00.create.bats +++ b/src/ezfio_files/00.create.bats @@ -23,6 +23,34 @@ function run { qp set mo_two_e_ints io_mo_two_e_integrals "Write" } +@test "H2_1" { + run h2_1.xyz 1 0 cc-pvdz +} + +@test "H2_3" { + run h2_3.xyz 3 0 cc-pvdz +} + +@test "H3_2" { + run h3_2.xyz 2 0 cc-pvdz +} + +@test "H3_4" { + run h3_4.xyz 4 0 cc-pvdz +} + +@test "H4_1" { + run h4_1.xyz 1 0 cc-pvdz +} + +@test "H4_3" { + run h4_3.xyz 3 0 cc-pvdz +} + +@test "H4_5" { + run h4_5.xyz 5 0 cc-pvdz +} + @test "B-B" { qp set_file b2_stretched.ezfio diff --git a/src/fci/40.fci.bats b/src/fci/40.fci.bats index 4523d0e0..889bf90a 100644 --- a/src/fci/40.fci.bats +++ b/src/fci/40.fci.bats @@ -10,8 +10,8 @@ function run() { qp set perturbation do_pt2 False qp set determinants n_det_max 8000 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 8 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 8 qp run fci energy1="$(ezfio get fci energy | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh @@ -24,99 +24,134 @@ function run_stoch() { qp set perturbation do_pt2 True qp set determinants n_det_max $3 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 1 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 1 qp run fci energy1="$(ezfio get fci energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh } -@test "B-B" { +@test "H2_1" { # 1s + qp set_file h2_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.06415255 1.e-8 10000 +} + +@test "H2_3" { # 1s + qp set_file h2_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -0.96029881 1.e-8 10000 +} + +@test "H3_2" { # 3s + qp set_file h3_2.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.61003132 1.e-8 10000 +} + +@test "H3_4" { # 2s + qp set_file h3_4.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.02434843 1.e-8 10000 +} + +@test "H4_1" { # 13s + qp set_file h4_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -2.01675062 1.e-8 10000 +} + +@test "H4_3" { # 10s + qp set_file h4_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.95927626 1.e-8 10000 +} + +@test "H4_5" { # 3s + qp set_file h4_5.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.25852765 1.e-8 10000 +} + +@test "B-B" { # 10s qp set_file b2_stretched.ezfio qp set determinants n_det_max 10000 qp set_frozen_core run_stoch -49.14103054419 3.e-4 10000 } -@test "F2" { # 4.07m - [[ -n $TRAVIS ]] && skip - qp set_file f2.ezfio - qp set_frozen_core - run_stoch -199.304922384814 3.e-3 100000 -} - -@test "NH3" { # 10.6657s +@test "NH3" { # 8s qp set_file nh3.ezfio qp set_mo_class --core="[1-4]" --act="[5-72]" run -56.244753429144986 3.e-4 100000 } -@test "DHNO" { # 11.4721s +@test "DHNO" { # 8s qp set_file dhno.ezfio qp set_mo_class --core="[1-7]" --act="[8-64]" - run -130.459020029816 3.e-4 100000 + run -130.466208113547 3.e-4 100000 } -@test "HCO" { # 12.2868s +@test "HCO" { # 32s qp set_file hco.ezfio - run -113.393356604085 1.e-3 100000 + run -113.395751656985 1.e-3 100000 } -@test "H2O2" { # 12.9214s +@test "H2O2" { # 21s qp set_file h2o2.ezfio qp set_mo_class --core="[1-2]" --act="[3-24]" --del="[25-38]" run -151.005848404095 1.e-3 100000 } -@test "HBO" { # 13.3144s +@test "HBO" { # 18s [[ -n $TRAVIS ]] && skip qp set_file hbo.ezfio - run -100.213 1.5e-3 100000 + run -100.214 1.5e-3 100000 } -@test "H2O" { # 11.3727s +@test "H2O" { # 16s [[ -n $TRAVIS ]] && skip qp set_file h2o.ezfio - run -76.2361605151999 5.e-4 100000 + run -76.238051555276 5.e-4 100000 } -@test "ClO" { # 13.3755s +@test "ClO" { # 47s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio - run -534.546453546852 1.e-3 100000 + run -534.548529710256 1.e-3 100000 } -@test "SO" { # 13.4952s +@test "SO" { # 23s [[ -n $TRAVIS ]] && skip qp set_file so.ezfio run -26.015 3.e-3 100000 } -@test "H2S" { # 13.6745s +@test "H2S" { # 37s [[ -n $TRAVIS ]] && skip qp set_file h2s.ezfio - run -398.859577605891 5.e-4 100000 + run -398.864853669111 5.e-4 100000 } -@test "OH" { # 13.865s +@test "OH" { # 12s [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio - run -75.6121856748294 3.e-4 100000 + run -75.615 1.5e-3 100000 } -@test "SiH2_3B1" { # 13.938ss +@test "SiH2_3B1" { # 10s [[ -n $TRAVIS ]] && skip qp set_file sih2_3b1.ezfio - run -290.0175411299477 3.e-4 100000 + run -290.0206626734517 3.e-4 100000 } -@test "H3COH" { # 14.7299s +@test "H3COH" { # 33s [[ -n $TRAVIS ]] && skip qp set_file h3coh.ezfio - run -115.205632960026 1.e-3 100000 + run -115.206784386204 1.e-3 100000 } -@test "SiH3" { # 15.99s +@test "SiH3" { # 15s [[ -n $TRAVIS ]] && skip qp set_file sih3.ezfio run -5.572 1.e-3 100000 @@ -132,7 +167,7 @@ function run_stoch() { @test "ClF" { # 16.8864s [[ -n $TRAVIS ]] && skip qp set_file clf.ezfio - run -559.169748890031 1.5e-3 100000 + run -559.174371468224 1.5e-3 100000 } @test "SO2" { # 17.5645s @@ -170,12 +205,11 @@ function run_stoch() { run -187.970184372047 1.6e-3 100000 } - @test "[Cu(NH3)4]2+" { # 25.0417s [[ -n $TRAVIS ]] && skip qp set_file cu_nh3_4_2plus.ezfio qp set_mo_class --core="[1-24]" --act="[25-45]" --del="[46-87]" - run -1862.9869374387192 3.e-04 100000 + run -1862.98320066637 3.e-04 100000 } @test "HCN" { # 20.3273s @@ -185,3 +219,10 @@ function run_stoch() { run -93.078 2.e-3 100000 } +@test "F2" { # 4.07m + [[ -n $TRAVIS ]] && skip + qp set_file f2.ezfio + qp set_frozen_core + run_stoch -199.304922384814 3.e-3 100000 +} + diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index ed75c882..3e6f229b 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -54,14 +54,18 @@ subroutine run_cipsi_tc implicit none - if (.not.is_zmq_slave) then + if (.not. is_zmq_slave) then + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num .ge. 3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then @@ -76,13 +80,16 @@ subroutine run_cipsi_tc call json_close else + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(elec_alpha_num+elec_beta_num.ge.3)then if(three_body_h_tc)then call provide_all_three_ints_bi_ortho endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp call run_slave_cipsi diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index df566032..6e7d0233 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -43,11 +43,39 @@ python write_pt_charges.py ${EZFIO} qp set nuclei point_charges True qp run scf | tee ${EZFIO}.pt_charges.out energy="$(ezfio get hartree_fock energy)" -good=-92.76613324421798 +good=-92.79920682236470 eq $energy $good $thresh rm -rf $EZFIO } +@test "H2_1" { # 1s + run h2_1.ezfio -1.005924963288527 +} + +@test "H2_3" { # 1s + run h2_3.ezfio -0.9591011604845440 +} + +@test "H3_2" { # 1s + run h3_2.ezfio -1.558273529860488 +} + +@test "H3_4" { # 1s + run h3_4.ezfio -1.0158684760025190 +} + +@test "H4_1" { # 1s + run h4_1.ezfio -1.932022805374405 +} + +@test "H4_3" { # 1s + run h4_3.ezfio -1.8948449927787350 +} + +@test "H4_5" { # 1s + run h4_5.ezfio -1.2408338805496990 +} + @test "point charges" { run_pt_charges } @@ -56,6 +84,8 @@ rm -rf $EZFIO run hcn.ezfio -92.88717500035233 } + + @test "B-B" { # 3s run b2_stretched.ezfio -48.9950585434279 } diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f deleted file mode 100644 index 7469ffd5..00000000 --- a/src/mo_optimization/my_providers.irp.f +++ /dev/null @@ -1,141 +0,0 @@ -! Dimensions of MOs - - -BEGIN_PROVIDER [ integer, n_mo_dim ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of MOs we can build, - ! with i>j - END_DOC - - n_mo_dim = mo_num*(mo_num-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_core ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of core MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_act ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of active MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_inact ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of inactive MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_virt ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of virtual MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 - -END_PROVIDER - -! Energies/criterions - -BEGIN_PROVIDER [ double precision, my_st_av_energy ] - implicit none - BEGIN_DOC - ! State average CI energy - END_DOC - - !call update_st_av_ci_energy(my_st_av_energy) - call state_average_energy(my_st_av_energy) - -END_PROVIDER - -! With all the MOs - -BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] -&BEGIN_PROVIDER [ double precision, my_CC1_opt ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, for all the MOs. - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision :: norm_grad - - PROVIDE mo_two_e_integrals_in_map - - call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, for all the MOs. - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision, allocatable :: h_f(:,:,:,:) - - PROVIDE mo_two_e_integrals_in_map - - allocate(h_f(mo_num, mo_num, mo_num, mo_num)) - - call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) - -END_PROVIDER - -! With the list of active MOs -! Can be generalized to any mo_class by changing the list/dimension - -BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] -&BEGIN_PROVIDER [ double precision, my_CC2_opt ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision :: norm_grad - - PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals - - call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision, allocatable :: h_f(:,:,:,:) - - PROVIDE mo_two_e_integrals_in_map - - allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) - - call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) - -END_PROVIDER diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 8b1e6e1c..32c0dccd 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -27,6 +27,8 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, double precision, allocatable :: buffer(:,:) print *, 'AO->MO Transformation of Cholesky vectors .' + + call set_multiple_levels_omp(.False.) !$OMP PARALLEL PRIVATE(i,j,k,buffer) allocate(buffer(mo_num,mo_num)) !$OMP DO SCHEDULE(static) diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f index fabdc42e..bb8cc782 100644 --- a/src/nuclei/nuclei.irp.f +++ b/src/nuclei/nuclei.irp.f @@ -206,7 +206,12 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ] enddo nuclear_repulsion *= 0.5d0 if(point_charges)then - nuclear_repulsion += pt_chrg_nuclei_interaction + pt_chrg_interaction + print*,'bear nuclear repulsion = ',nuclear_repulsion + print*,'adding the interaction between the nuclein and the point charges' + print*,'to the usual nuclear repulsion ' + nuclear_repulsion += pt_chrg_nuclei_interaction + print*,'new nuclear repulsion = ',nuclear_repulsion + print*,'WARNING: we do not add the interaction between the point charges themselves' endif end if diff --git a/src/nuclei/point_charges.irp.f b/src/nuclei/point_charges.irp.f index b955537f..66905c8c 100644 --- a/src/nuclei/point_charges.irp.f +++ b/src/nuclei/point_charges.irp.f @@ -205,5 +205,8 @@ BEGIN_PROVIDER [ double precision, pt_chrg_nuclei_interaction] enddo print*,'Interaction between point charges and nuclei' print*,'pt_chrg_nuclei_interaction = ',pt_chrg_nuclei_interaction + if(point_charges)then + provide pt_chrg_interaction + endif END_PROVIDER diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f index 61633d3b..1942e542 100644 --- a/src/scf_utils/fock_matrix.irp.f +++ b/src/scf_utils/fock_matrix.irp.f @@ -5,6 +5,90 @@ ! Fock matrix on the MO basis. ! For open shells, the ROHF Fock Matrix is :: ! + ! | Rcc | F^b | Fcv | + ! |-----------------------| + ! | F^b | Roo | F^a | + ! |-----------------------| + ! | Fcv | F^a | Rvv | + ! + ! C: Core, O: Open, V: Virtual + ! + ! Rcc = Acc Fcc^a + Bcc Fcc^b + ! Roo = Aoo Foo^a + Boo Foo^b + ! Rvv = Avv Fvv^a + Bvv Fvv^b + ! Fcv = (F^a + F^b)/2 + ! + ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) + ! A,B: Coupling parameters + ! + ! J. Chem. Phys. 133, 141102 (2010), https://doi.org/10.1063/1.3503173 + ! Coupling parameters from J. Chem. Phys. 125, 204110 (2006); https://doi.org/10.1063/1.2393223. + ! cc oo vv + ! A -0.5 0.5 1.5 + ! B 1.5 0.5 -0.5 + ! + END_DOC + integer :: i,j,n + if (elec_alpha_num == elec_beta_num) then + Fock_matrix_mo = Fock_matrix_mo_alpha + else + ! Core + do j = 1, elec_beta_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = - 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 1.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + ! Open + do j = elec_beta_num+1, elec_alpha_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + enddo + ! Virtual + do j = elec_alpha_num+1, mo_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 1.5d0 * fock_matrix_mo_alpha(i,j) & + - 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + endif + + ! Old + ! BEGIN_DOC + ! Fock matrix on the MO basis. + ! For open shells, the ROHF Fock Matrix is :: + ! ! | F-K | F + K/2 | F | ! |---------------------------------| ! | F + K/2 | F | F - K/2 | @@ -16,64 +100,64 @@ ! ! K = Fb - Fa ! - END_DOC - integer :: i,j,n - if (elec_alpha_num == elec_beta_num) then - Fock_matrix_mo = Fock_matrix_mo_alpha - else + ! END_DOC + !integer :: i,j,n + !if (elec_alpha_num == elec_beta_num) then + ! Fock_matrix_mo = Fock_matrix_mo_alpha + !else - do j=1,elec_beta_num - ! F-K - do i=1,elec_beta_num !CC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K/2 - do i=elec_beta_num+1,elec_alpha_num !CA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_alpha_num+1, mo_num !CV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - enddo + ! do j=1,elec_beta_num + ! ! F-K + ! do i=1,elec_beta_num !CC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K/2 + ! do i=elec_beta_num+1,elec_alpha_num !CA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_alpha_num+1, mo_num !CV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! enddo - do j=elec_beta_num+1,elec_alpha_num - ! F+K/2 - do i=1,elec_beta_num !AC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_beta_num+1,elec_alpha_num !AA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_alpha_num+1, mo_num !AV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_beta_num+1,elec_alpha_num + ! ! F+K/2 + ! do i=1,elec_beta_num !AC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_beta_num+1,elec_alpha_num !AA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_alpha_num+1, mo_num !AV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - do j=elec_alpha_num+1, mo_num - ! F - do i=1,elec_beta_num !VC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_beta_num+1,elec_alpha_num !VA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K - do i=elec_alpha_num+1,mo_num !VV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & - + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_alpha_num+1, mo_num + ! ! F + ! do i=1,elec_beta_num !VC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_beta_num+1,elec_alpha_num !VA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K + ! do i=elec_alpha_num+1,mo_num !VV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & + ! + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - endif + !endif do i = 1, mo_num Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i) @@ -115,8 +199,6 @@ END_PROVIDER - - BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_num,mo_num) ] implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index cc01d144..7259c270 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -11,22 +11,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i, h1, p1, h2, p2 + integer :: i, ii, h1, p1, h2, p2, ipoint integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aba, hthree_aaa, hthree_aab - double precision :: wall0, wall1 + double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + call wall_time(walli) - PROVIDE N_int - - print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -35,6 +29,11 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -42,67 +41,978 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - + ! --- + ! aba contraction - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + print*,' Providing aba_contraction ...' + call wall_time(wall0) - ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + tmp = 0.d0 + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmp_2d) + + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 + + normal_two_body_bi_orth = tmp + + ! --- + ! aab contraction + + print*,' Providing aab_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) enddo enddo enddo enddo !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo !$OMP END PARALLEL - deallocate( occ ) - deallocate( key_i_core ) - endif + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wallf) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord if(write_tc_norm_ord.and.mpi_master) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") @@ -112,285 +1022,10 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth ', wallf-walli END_PROVIDER ! --- -subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, integral - - !!!! double alpha/beta - hthree = 0.d0 - - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) - enddo - - do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo - - return -end - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: h1, p1, h2, p2, i - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - - normal_two_body_bi_orth_ab = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for same spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i,ii,j,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aab, hthree_aaa - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - normal_two_body_bi_orth_aa_bb = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1 , n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1 , n_act_orb - p2 = list_act(pp2) - if(h1h2 - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - -subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - BEGIN_DOC - ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii,i - double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 - double precision :: integral,int_exc_l,int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13= -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) - enddo - - do ii = Ne(2)+1,Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23 = -1.d0 * integral - - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) - enddo - - return -end - -! --- - -subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 - double precision :: integral, int_exc_l, int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct - int_exc_23 - enddo - - return -end - -! --- - diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f new file mode 100644 index 00000000..855cfd17 --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -0,0 +1,1062 @@ + +! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f new file mode 100644 index 00000000..417580dd --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -0,0 +1,392 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth_old ...' + call wall_time(wall0) + + PROVIDE N_int + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + read(11) normal_two_body_bi_orth_old + close(11) + + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_old = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth_old) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_old + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth_old ', wall1-wall0 + +END_PROVIDER + +! --- + +subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, integral + + !!!! double alpha/beta + hthree = 0.d0 + + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) + enddo + + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + + return +end + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: h1, p1, h2, p2, i + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_ab = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for same spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i,ii,j,h1,p1,h2,p2 + integer :: hh1,hh2,pp1,pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree_aab, hthree_aaa + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op)then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + normal_two_body_bi_orth_aa_bb = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1 , n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1 , n_act_orb + p2 = list_act(pp2) + if(h1h2 + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + BEGIN_DOC + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii,i + double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 + double precision :: integral,int_exc_l,int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13= -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) + enddo + + do ii = Ne(2)+1,Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + !hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + hthree += 0.5d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + enddo + + return +end + +! --- + +subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 + double precision :: integral, int_exc_l, int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct - int_exc_23 + enddo + + return +end + +! --- + diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 882470ed..ceefbfb8 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,25 +1,37 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif +! --- + +subroutine provide_all_three_ints_bi_ortho() + + BEGIN_DOC + ! routine that provides all necessary three-electron integrals + END_DOC + + implicit none + + if(three_body_h_tc) then + + if(three_e_3_idx_term) then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + + if(three_e_4_idx_term) then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + endif + + if(.not. double_normal_ord .and. three_e_5_idx_term) then + PROVIDE three_e_5_idx_direct_bi_ort + elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then + PROVIDE normal_two_body_bi_orth + endif + endif + + return end +! --- + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 7178d6d9..9719a6e7 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -243,7 +243,9 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree, do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO + ! use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree += direct_int - exchange_int enddo else !! ispin NE to ispin_fock @@ -322,7 +324,8 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree -= direct_int - exchange_int enddo else !! ispin NE to ispin_fock diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e725d8e5..3180d946 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -96,9 +96,11 @@ double precision function three_e_single_parrallel_spin(m,j,k,i) implicit none integer, intent(in) :: i,k,j,m three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i) ! direct - three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations + three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_1_bi_ort(j,m,k,i) & ! two cyclic permutations - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i) & ! two first exchange - - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange + - three_e_4_idx_exch13_bi_ort(j,m,k,i) ! last exchange + ! TODO + ! use transpose end double precision function three_e_double_parrallel_spin(m,l,j,k,i) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index 24bb7017..9023e2f0 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -38,15 +38,16 @@ subroutine write_tc_var() implicit none integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot + double precision :: hmono, htwoe, hthree, htot_1j, htot_j1 double precision :: SIGMA_TC do k = 1, n_states SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) - SIGMA_TC = SIGMA_TC + htot * htot + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo print *, " state : ", k diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index df86ea65..33b5c5aa 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,12 +11,14 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_h_u0 +! call test_h_u0 ! call test_slater_tc_opt ! call timing_tot ! call timing_diag ! call timing_single ! call timing_double + + call test_no() end subroutine test_h_u0 @@ -252,3 +254,47 @@ subroutine timing_double end +! --- + +subroutine test_no() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing normal_two_body_bi_orth ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_old + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 + + return +end + +! --- + + diff --git a/src/tools/print_wf.irp.f b/src/tools/print_wf.irp.f index 64eb1a1f..9621ee89 100644 --- a/src/tools/print_wf.irp.f +++ b/src/tools/print_wf.irp.f @@ -47,6 +47,7 @@ subroutine routine do i = 1, min(N_det_print_wf,N_det) print*,'' print*,'i = ',i + print *,psi_det_sorted(1,1,i) call debug_det(psi_det_sorted(1,1,i),N_int) call get_excitation_degree(psi_det_sorted(1,1,i),psi_det_sorted(1,1,1),degree,N_int) print*,'degree = ',degree diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 69873bc0..76a539a6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1831,7 +1831,7 @@ double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work -character, parameter :: uplo = "U" +character, parameter :: uplo = 'L' integer :: LDA integer :: info integer :: k, l, rank0 @@ -1848,14 +1848,14 @@ if (rank > rank0) then end if do k = 1, ndim - A(k+1:ndim, k) = 0.00D+0 + A(k,k+1:ndim) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 do k = 1, ndim l = piv(k) - U(l, 1:rank) = A(1:rank, k) + U(l, 1:rank) = A(k,1:rank) end do end subroutine pivoted_cholesky diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index aba99c2b..a9f1a438 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -490,7 +490,7 @@ end subroutine check_sym subroutine sum_A_At(A, N) !BEGIN_DOC - ! useful for symmetrizing a tensor without a temporary tensor + ! add a tensor with its transpose without a temporary tensor !END_DOC implicit none @@ -521,3 +521,38 @@ subroutine sum_A_At(A, N) end +! --- + +subroutine sub_A_At(A, N) + + !BEGIN_DOC + ! substruct a tensor with its transpose without a temporary tensor + !END_DOC + + implicit none + integer, intent(in) :: N + double precision, intent(inout) :: A(N,N) + integer :: i, j + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j) & + !$OMP SHARED (A, N) + !$OMP DO + do j = 1, N + do i = j, N + A(i,j) -= A(j,i) + enddo + enddo + !$OMP END DO + + !$OMP DO + do j = 2, N + do i = 1, j-1 + A(i,j) = -A(j,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index 71ee87e3..fb6d9034 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -46,17 +46,11 @@ doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation t interface: ezfio,ocaml,provider default: MP -[cc_write_t1] -type: logical -doc: If true, it will write on disk the T1 amplitudes at the end of the calculation. -interface: ezfio,ocaml,provider -default: False - -[cc_write_t2] -type: logical -doc: If true, it will write on disk the T2 amplitudes at the end of the calculation. -interface: ezfio,ocaml,provider -default: False +[io_amplitudes] +type: Disk_access +doc: Read/Write |CCSD| amplitudes from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None [cc_par_t] type: logical diff --git a/src/utils_cc/guess_t.irp.f b/src/utils_cc/guess_t.irp.f index 42acdf78..bb26e133 100644 --- a/src/utils_cc/guess_t.irp.f +++ b/src/utils_cc/guess_t.irp.f @@ -91,16 +91,17 @@ subroutine write_t1(nO,nV,t1) double precision, intent(in) :: t1(nO, nV) ! internal - integer :: i,a + integer :: i,a, iunit + integer, external :: getunitandopen - if (cc_write_t1) then - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','w') do a = 1, nV do i = 1, nO - write(11,'(F20.12)') t1(i,a) + write(iunit,'(F20.12)') t1(i,a) enddo enddo - close(11) + close(iunit) endif end @@ -120,20 +121,21 @@ subroutine write_t2(nO,nV,t2) double precision, intent(in) :: t2(nO, nO, nV, nV) ! internal - integer :: i,j,a,b + integer :: i,j,a,b, iunit + integer, external :: getunitandopen - if (cc_write_t2) then - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','w') do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO - write(11,'(F20.12)') t2(i,j,a,b) + write(iunit,'(F20.12)') t2(i,j,a,b) enddo enddo enddo enddo - close(11) + close(iunit) endif end @@ -153,23 +155,19 @@ subroutine read_t1(nO,nV,t1) double precision, intent(out) :: t1(nO, nV) ! internal - integer :: i,a + integer :: i,a, iunit logical :: ok + integer, external :: getunitandopen - inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) - if (.not. ok) then - print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' - print*, 'Do a first calculation with cc_write_t1 = True' - print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' - call abort - endif - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') - do a = 1, nV - do i = 1, nO - read(11,'(F20.12)') t1(i,a) + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','r') + do a = 1, nV + do i = 1, nO + read(iunit,'(F20.12)') t1(i,a) + enddo enddo - enddo - close(11) + close(iunit) + endif end @@ -188,26 +186,23 @@ subroutine read_t2(nO,nV,t2) double precision, intent(out) :: t2(nO, nO, nV, nV) ! internal - integer :: i,j,a,b + integer :: i,j,a,b, iunit logical :: ok - inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) - if (.not. ok) then - print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' - print*, 'Do a first calculation with cc_write_t2 = True' - print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' - call abort - endif - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - read(11,'(F20.12)') t2(i,j,a,b) + integer, external :: getunitandopen + + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','r') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(iunit,'(F20.12)') t2(i,j,a,b) + enddo enddo enddo enddo - enddo - close(11) + close(iunit) + endif end diff --git a/src/utils_cc/org/phase.org b/src/utils_cc/org/phase.org index 5f67859c..2156a251 100644 --- a/src/utils_cc/org/phase.org +++ b/src/utils_cc/org/phase.org @@ -137,6 +137,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n1(j) + if (k > n_anni(j)) exit if (l1(i,j) /= list_anni(k,j)) cycle pos_anni(k,j) = i k = k + 1 @@ -147,6 +148,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n2(j) + if (k > n_crea(j)) exit if (l2(i,j) /= list_crea(k,j)) cycle pos_crea(k,j) = i k = k + 1 diff --git a/src/utils_cc/phase.irp.f b/src/utils_cc/phase.irp.f index 01b41f49..e0703fb8 100644 --- a/src/utils_cc/phase.irp.f +++ b/src/utils_cc/phase.irp.f @@ -96,6 +96,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n1(j) + if (k > n_anni(j)) exit if (l1(i,j) /= list_anni(k,j)) cycle pos_anni(k,j) = i k = k + 1 @@ -106,6 +107,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n2(j) + if (k > n_crea(j)) exit if (l2(i,j) /= list_crea(k,j)) cycle pos_crea(k,j) = i k = k + 1 diff --git a/tests/input/h2_1.xyz b/tests/input/h2_1.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_1.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h2_3.xyz b/tests/input/h2_3.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_3.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h3_2.xyz b/tests/input/h3_2.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_2.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h3_4.xyz b/tests/input/h3_4.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_4.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h4_1.xyz b/tests/input/h4_1.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_1.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_3.xyz b/tests/input/h4_3.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_3.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_5.xyz b/tests/input/h4_5.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_5.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 +