diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio index e7c44b37..e53a9392 100755 --- a/bin/qp_convert_output_to_ezfio +++ b/bin/qp_convert_output_to_ezfio @@ -162,11 +162,11 @@ def write_ezfio(res, filename): # P a r s i n g # # ~#~#~#~#~#~#~ # - prim_num_max = ezfio.get_ao_basis_ao_prim_num_max() + prim_num_max = max(ezfio.get_ao_basis_ao_prim_num()) + ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) for i in range(len(res.basis)): - coefficient[ - i] += [0. for j in range(len(coefficient[i]), prim_num_max)] + coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] exponent[i] += [0. for j in range(len(exponent[i]), prim_num_max)] coefficient = reduce(lambda x, y: x + y, coefficient, []) diff --git a/config/bull.cfg b/config/bull.cfg index 6a93fdca..91471473 100644 --- a/config/bull.cfg +++ b/config/bull.cfg @@ -6,7 +6,7 @@ # --align=32 : Align all provided arrays on a 32-byte boundary # [COMMON] -FC : mpiifort -shared-libgcc -shared-intel -fpic +FC : mpiifort -fpic -xCORE-AVX2 LAPACK_LIB : -mkl=parallel IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 -DMPI @@ -31,7 +31,7 @@ OPENMP : 1 ; Append OpenMP flags # -ftz : Flushes denormal results to zero # [OPT] -FCFLAGS : -xCORE-AVX2 -O2 -ip -ftz -g -traceback -qopt-prefetch=5 -qopt-prefetch-issue-excl-hint -unroll-aggressive +FCFLAGS : -O2 -ip -ftz -g -traceback -qopt-prefetch=5 -qopt-prefetch-issue-excl-hint -unroll-aggressive # Profiling flags ################# # diff --git a/config/ifort_2019_debug.cfg b/config/ifort_2019_debug.cfg new file mode 100644 index 00000000..cb14f467 --- /dev/null +++ b/config/ifort_2019_debug.cfg @@ -0,0 +1,66 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -msse4.2 -O2 -ip -ftz -g + + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -msse4.2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index dfbab167..b9d14efe 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -6,7 +6,7 @@ open Qputils *) - + let print_list () = Lazy.force Qpackage.executables |> List.iter (fun (x,_) -> Printf.printf " * %s\n" x) @@ -151,10 +151,11 @@ let run slave ?prefix exe ezfio_file = let duration = Unix.time () -. time_start |> Unix.gmtime in let open Unix in let d, h, m, s = - duration.tm_yday, duration.tm_hour, duration.tm_min, duration.tm_sec + duration.tm_yday, duration.tm_hour, duration.tm_min, duration.tm_sec in Printf.printf "Wall time: %d:%2.2d:%2.2d" (d*24+h) m s ; Printf.printf "\n\n"; + Unix.sleep 1; if (exit_code <> 0) then exit exit_code @@ -187,7 +188,7 @@ let () = end; (* Handle options *) - let slave = Command_line.get_bool "slave" + let slave = Command_line.get_bool "slave" and prefix = Command_line.get "prefix" in diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 7df3c62d..aad85778 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -126,7 +126,7 @@ def ninja_create_env_variable(pwd_config_file): try: content = "" with open(libfile,'r') as f: - content = f.read() + content = f.read().replace('\n','') str_lib += " "+content except IOError: pass diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index dd61b1be..3ac16446 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -57,13 +57,13 @@ default: false [ao_normalized] type: logical -doc: Use normalized basis functions +doc: Normalize the atomic orbitals interface: ezfio, provider -default: true +default: false [primitives_normalized] type: logical -doc: Use normalized primitive functions +doc: Normalize the primitive basis functions interface: ezfio, provider default: true diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f index 3a9e9fb7..dafea9c4 100644 --- a/src/ao_basis/aos.irp.f +++ b/src/ao_basis/aos.irp.f @@ -63,15 +63,14 @@ END_PROVIDER ! Coefficients including the |AO| normalization END_DOC - do i=1,ao_num - l = ao_shell(i) - ao_coef_normalized(i,:) = shell_coef(l,:) * shell_normalization_factor(l) - end do double precision :: norm,overlap_x,overlap_y,overlap_z,C_A(3), c - integer :: l, powA(3), nz + integer :: l, powA(3) + integer, parameter :: nz=100 integer :: i,j,k - nz=100 + + ao_coef_normalized(:,:) = ao_coef(:,:) + C_A = 0.d0 do i=1,ao_num @@ -80,7 +79,7 @@ END_PROVIDER powA(2) = ao_power(i,2) powA(3) = ao_power(i,3) - ! Normalization of the primitives + ! GAMESS-type normalization of the primitives if (primitives_normalized) then do j=1,ao_prim_num(i) call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), & @@ -91,6 +90,7 @@ END_PROVIDER ! Normalization of the contracted basis functions if (ao_normalized) then norm = 0.d0 + l = ao_shell(i) do j=1,ao_prim_num(i) do k=1,ao_prim_num(i) call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz) @@ -98,6 +98,7 @@ END_PROVIDER enddo enddo ao_coef_normalization_factor(i) = 1.d0/dsqrt(norm) + ao_coef_normalized(i,:) *= ao_coef_normalization_factor(i) else ao_coef_normalization_factor(i) = 1.d0 endif diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f index 461583ca..f01ed5ba 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f @@ -1,4 +1,6 @@ +! --- + BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC @@ -13,27 +15,29 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n double precision :: coef, beta, B_center(3) double precision :: tmp double precision :: wall0, wall1 - + double precision :: int_gauss, dsqpi_3_2, int_j1b + double precision :: factor_ij_1s, beta_ij, center_ij_1s(3), sq_pi_3_2 double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s - double precision :: int_gauss,dsqpi_3_2,int_j1b - double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 - sq_pi_3_2 = (dacos(-1.d0))**(3/2) + + print*, ' providing int2_grad1u2_grad2u2_j1b2_test ...' + + sq_pi_3_2 = (dacos(-1.d0))**(1.5d0) provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef call wall_time(wall0) - int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0 + int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) & - !$OMP SHARED (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,& - !$OMP final_grid_points_transp, ng_fit_jast, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & - !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,& - !$OMP ao_overlap_abs_grid,sq_pi_3_2) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) & + !$OMP SHARED (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size, & + !$OMP final_grid_points_transp, ng_fit_jast, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & + !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b, & + !$OMP ao_overlap_abs,sq_pi_3_2) !$OMP DO SCHEDULE(dynamic) do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -41,7 +45,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n r(3) = final_grid_points(3,ipoint) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. 1.d-12) then cycle endif @@ -61,7 +65,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef ! if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-12)cycle + if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle ! call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & ! expo_fit, i, j, int_fit_v, n_points_final_grid) @@ -91,6 +95,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)] ! ! BEGIN_DOC @@ -109,6 +115,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao double precision, allocatable :: int_fit_v(:),big_array(:,:,:) double precision, external :: overlap_gauss_r12_ao_with1s + print*, ' providing int2_grad1u2_grad2u2_j1b2_test_v ...' + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) @@ -123,14 +131,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, big_array,& - !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs_grid) + !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs) ! allocate(int_fit_v(n_points_final_grid)) !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. 1.d-12) then cycle endif @@ -139,7 +147,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle +! if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -185,6 +193,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC @@ -202,7 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ double precision, external :: overlap_gauss_r12_ao double precision, external :: overlap_gauss_r12_ao_with1s double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 - sq_pi_3_2 = (dacos(-1.d0))**(3/2) + + print*, ' providing int2_u2_j1b2_test ...' + + sq_pi_3_2 = (dacos(-1.d0))**(1.5d0) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -213,7 +226,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s) & !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b3_size, & - !$OMP final_grid_points, ng_fit_jast,ao_overlap_abs_grid, & + !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2, & !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b) @@ -225,9 +238,6 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then - cycle - endif tmp = 0.d0 @@ -236,7 +246,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -248,7 +258,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) ! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-12)cycle + if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle ! --- @@ -283,7 +293,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! @@ -298,7 +308,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp double precision :: tmp_x, tmp_y, tmp_z, int_j1b double precision :: wall0, wall1, sq_pi_3_2,sq_alpha - sq_pi_3_2 = dacos(-1.D0)**(3/2) + + print*, ' providing int2_u_grad1u_x_j1b2_test ...' + + sq_pi_3_2 = dacos(-1.D0)**(1.d0) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -310,7 +323,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & !$OMP tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha) & !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b3_size, & - !$OMP final_grid_points, ng_fit_jast, ao_overlap_abs_grid,& + !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2) @@ -323,9 +336,6 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then - cycle - endif tmp_x = 0.d0 tmp_y = 0.d0 @@ -335,7 +345,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -359,7 +369,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv) ! if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version - if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-14) cycle + if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) @@ -372,9 +382,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num enddo - int2_u_grad1u_x_j1b2_test(1,j,i,ipoint) = tmp_x - int2_u_grad1u_x_j1b2_test(2,j,i,ipoint) = tmp_y - int2_u_grad1u_x_j1b2_test(3,j,i,ipoint) = tmp_z + int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = tmp_x + int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = tmp_y + int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = tmp_z enddo enddo enddo @@ -384,9 +394,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - int2_u_grad1u_x_j1b2_test(1,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(1,i,j,ipoint) - int2_u_grad1u_x_j1b2_test(2,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(2,i,j,ipoint) - int2_u_grad1u_x_j1b2_test(3,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(3,i,j,ipoint) + int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) + int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) + int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,3) enddo enddo enddo @@ -415,7 +425,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p double precision :: j12_mu_r12,int_j1b double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2 double precision :: beta_ij,center_ij_1s(3),factor_ij_1s - dsqpi_3_2 = (dacos(-1.d0))**(3/2) + + print*, ' providing int2_u_grad1u_j1b2_test ...' + + dsqpi_3_2 = (dacos(-1.d0))**(1.5d0) provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent call wall_time(wall0) @@ -438,7 +451,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) r(3) = final_grid_points(3,ipoint) @@ -449,7 +462,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -461,7 +474,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p expo_fit = expo_gauss_j_mu_1_erf(i_fit) call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) - if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle + if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit @@ -471,9 +484,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - if(expo_coef_1s .gt. 80.d0) cycle + if(expo_coef_1s .gt. 20.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-10) cycle + if(dabs(coef_tmp) .lt. 1d-08) cycle int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 5cd2aac6..8196614f 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -19,9 +19,11 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n double precision, external :: overlap_gauss_r12_ao double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + print*, ' providing int2_grad1u2_grad2u2_j1b2 ...' call wall_time(wall0) + provide mu_erf final_grid_points j1b_pen + int2_grad1u2_grad2u2_j1b2 = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & @@ -88,7 +90,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n enddo call wall_time(wall1) - print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 + print*, ' wall time for int2_grad1u2_grad2u2_j1b2 =', wall1 - wall0 END_PROVIDER @@ -111,9 +113,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final double precision, external :: overlap_gauss_r12_ao double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + print*, ' providing int2_u2_j1b2 ...' call wall_time(wall0) + provide mu_erf final_grid_points j1b_pen + int2_u2_j1b2 = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & @@ -186,7 +190,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! @@ -202,9 +206,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p double precision :: tmp_x, tmp_y, tmp_z double precision :: wall0, wall1 - provide mu_erf final_grid_points j1b_pen + print*, ' providing int2_u_grad1u_x_j1b2 ...' call wall_time(wall0) + provide mu_erf final_grid_points j1b_pen + int2_u_grad1u_x_j1b2 = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & @@ -278,9 +284,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p enddo - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z + int2_u_grad1u_x_j1b2(j,i,ipoint,1) = tmp_x + int2_u_grad1u_x_j1b2(j,i,ipoint,2) = tmp_y + int2_u_grad1u_x_j1b2(j,i,ipoint,3) = tmp_z enddo enddo enddo @@ -290,15 +296,15 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint) - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint) - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint) + int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1) + int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2) + int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 + print*, ' wall time for int2_u_grad1u_x_j1b2 = ', wall1 - wall0 END_PROVIDER @@ -320,9 +326,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points double precision :: wall0, wall1 double precision, external :: NAI_pol_mult_erf_ao_with1s - provide mu_erf final_grid_points j1b_pen + print*, ' providing int2_u_grad1u_j1b2 ...' call wall_time(wall0) + provide mu_erf final_grid_points j1b_pen + int2_u_grad1u_j1b2 = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f index 6d3931f5..21927371 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f @@ -241,7 +241,7 @@ ! !! --- ! -!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)] +!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)] ! ! BEGIN_DOC ! ! @@ -308,7 +308,7 @@ ! ! ! --- ! -! int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1) +! int2_u_grad1u_x_j1b2(j,i,ipoint,1) += coef_fit * int_fit_v(ipoint,1) ! ! if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then ! i_mask_grid1 += 1 @@ -320,7 +320,7 @@ ! ! ! --- ! -! int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2) +! int2_u_grad1u_x_j1b2(j,i,ipoint,2) += coef_fit * int_fit_v(ipoint,2) ! ! if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then ! i_mask_grid2 += 1 @@ -332,7 +332,7 @@ ! ! ! --- ! -! int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3) +! int2_u_grad1u_x_j1b2(j,i,ipoint,3) += coef_fit * int_fit_v(ipoint,3) ! ! if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then ! i_mask_grid3 += 1 @@ -408,15 +408,15 @@ ! call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid) ! ! do ipoint = 1, i_mask_grid1 -! int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1) +! int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,1),1) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1) ! enddo ! ! do ipoint = 1, i_mask_grid2 -! int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2) +! int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,2),2) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2) ! enddo ! ! do ipoint = 1, i_mask_grid3 -! int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3) +! int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,3),3) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3) ! enddo ! ! enddo @@ -439,15 +439,15 @@ ! do ipoint = 1, n_points_final_grid ! do i = 2, ao_num ! do j = 1, i-1 -! int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint) -! int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint) -! int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint) +! int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1) +! int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2) +! int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3) ! enddo ! enddo ! enddo ! ! call wall_time(wall1) -! print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 +! print*, ' wall time for int2_u_grad1u_x_j1b2 =', wall1 - wall0 ! !END_PROVIDER ! diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f index 58a670e0..a6a55810 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f @@ -17,7 +17,10 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, double precision :: wall0, wall1 double precision, external :: NAI_pol_mult_erf_ao_with1s double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2 - dsqpi_3_2 = (dacos(-1.d0))**(3/2) + + print*, ' providing v_ij_erf_rk_cst_mu_j1b_test ...' + + dsqpi_3_2 = (dacos(-1.d0))**(1.5d0) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -38,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -46,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -85,54 +88,28 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R| END_DOC - implicit none - integer :: i, j, ipoint - double precision :: wall0, wall1 - - call wall_time(wall0) - - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint) - x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint) - x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint) - enddo - enddo - enddo - - call wall_time(wall1) - print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R| - END_DOC - implicit none integer :: i, j, ipoint, i_1s double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3) double precision :: tmp_x, tmp_y, tmp_z double precision :: wall0, wall1 double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s - dsqpi_3_2 = (dacos(-1.d0))**(3/2) + + print*, ' providing x_v_ij_erf_rk_cst_mu_j1b_test ...' + + dsqpi_3_2 = (dacos(-1.d0))**(1.5d0) provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center call wall_time(wall0) - x_v_ij_erf_rk_cst_mu_tmp_j1b_test = 0.d0 + x_v_ij_erf_rk_cst_mu_j1b_test = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb, & - !$OMP int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s) & + !$OMP int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s) & !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,& !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent, & - !$OMP x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & + !$OMP x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma) ! !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss) !$OMP DO @@ -143,7 +120,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle tmp_x = 0.d0 tmp_y = 0.d0 @@ -153,7 +130,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -164,7 +141,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num ! call gaussian_product(expo_erfc_mu_gauss,r, & ! ao_prod_sigma(j,i),ao_prod_center(1,j,i), & ! factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-3/2)).lt.1.d-10)cycle +! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle ! endif call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) @@ -174,9 +151,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num tmp_z += coef * (ints(3) - ints_coulomb(3)) enddo - x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint) = tmp_x - x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint) = tmp_y - x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint) = tmp_z + x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = tmp_x + x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = tmp_y + x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = tmp_z enddo enddo enddo @@ -186,15 +163,15 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,i,j,ipoint) - x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,i,j,ipoint) - x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,i,j,ipoint) + x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) + x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) + x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b_test', wall1 - wall0 + print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0 END_PROVIDER @@ -218,7 +195,10 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po double precision, external :: overlap_gauss_r12_ao_with1s double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b - dsqpi_3_2 = (dacos(-1.d0))**(3/2) + + print*, ' providing v_ij_u_cst_mu_j1b_test ...' + + dsqpi_3_2 = (dacos(-1.d0))**(1.5d0) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -244,7 +224,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -252,7 +232,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -311,7 +291,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, double precision, external :: overlap_gauss_r12_ao_with1s double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b - dsqpi_3_2 = (dacos(-1.d0))**(3/2) + dsqpi_3_2 = (dacos(-1.d0))**(1.5d0) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -337,7 +317,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -345,7 +325,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle + if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 8fff961b..fc30cd83 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -17,9 +17,11 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po double precision :: wall0, wall1 double precision, external :: NAI_pol_mult_erf_ao_with1s - provide mu_erf final_grid_points j1b_pen + print *, ' providing v_ij_erf_rk_cst_mu_j1b ...' call wall_time(wall0) + provide mu_erf final_grid_points j1b_pen + v_ij_erf_rk_cst_mu_j1b = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & @@ -99,51 +101,23 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_ ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R| END_DOC - implicit none - integer :: i, j, ipoint - double precision :: wall0, wall1 - - call wall_time(wall0) - - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) - x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) - x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) - enddo - enddo - enddo - - call wall_time(wall1) - print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R| - END_DOC - implicit none integer :: i, j, ipoint, i_1s double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3) double precision :: tmp_x, tmp_y, tmp_z double precision :: wall0, wall1 + print*, ' providing x_v_ij_erf_rk_cst_mu_j1b ...' call wall_time(wall0) - x_v_ij_erf_rk_cst_mu_tmp_j1b = 0.d0 + x_v_ij_erf_rk_cst_mu_j1b = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb, & !$OMP tmp_x, tmp_y, tmp_z) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,& !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent, & - !$OMP x_v_ij_erf_rk_cst_mu_tmp_j1b, mu_erf) + !$OMP x_v_ij_erf_rk_cst_mu_j1b, mu_erf) !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid @@ -195,9 +169,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_ ! --- - x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x - x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y - x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z + x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = tmp_x + x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = tmp_y + x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = tmp_z enddo enddo enddo @@ -207,15 +181,15 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_ do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) + x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) + x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) + x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b', wall1 - wall0 + print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b =', wall1 - wall0 END_PROVIDER @@ -239,9 +213,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + print*, ' providing v_ij_u_cst_mu_j1b ...' call wall_time(wall0) + provide mu_erf final_grid_points j1b_pen + v_ij_u_cst_mu_j1b = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & diff --git a/src/ao_many_one_e_ints/grad_related_ints.irp.f b/src/ao_many_one_e_ints/grad_related_ints.irp.f index 67fb0fe7..8624e7b8 100644 --- a/src/ao_many_one_e_ints/grad_related_ints.irp.f +++ b/src/ao_many_one_e_ints/grad_related_ints.irp.f @@ -17,6 +17,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points double precision :: NAI_pol_mult_erf_ao + print*, ' providing v_ij_erf_rk_cst_mu ...' + provide mu_erf final_grid_points call wall_time(wall0) @@ -54,7 +56,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points enddo call wall_time(wall1) - print*, ' wall time for v_ij_erf_rk_cst_mu ', wall1 - wall0 + print*, ' wall time for v_ij_erf_rk_cst_mu = ', wall1 - wall0 END_PROVIDER @@ -73,6 +75,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr double precision :: wall0, wall1 double precision :: NAI_pol_mult_erf_ao + print *, ' providing v_ij_erf_rk_cst_mu_transp ...' + provide mu_erf final_grid_points call wall_time(wall0) @@ -107,7 +111,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr enddo call wall_time(wall1) - print *, ' wall time for v_ij_erf_rk_cst_mu_transp ', wall1 - wall0 + print *, ' wall time for v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0 END_PROVIDER @@ -124,6 +128,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num, double precision :: r(3), ints(3), ints_coulomb(3) double precision :: wall0, wall1 + print*, ' providing x_v_ij_erf_rk_cst_mu_tmp ...' + call wall_time(wall0) !$OMP PARALLEL & @@ -162,13 +168,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num, enddo call wall_time(wall1) - print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0 + print *, ' wall time for x_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0 END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_points_final_grid,3)] +BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R| @@ -178,6 +184,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point integer :: i, j, ipoint double precision :: wall0, wall1 + print *, ' providing x_v_ij_erf_rk_cst_mu ...' + call wall_time(wall0) do ipoint = 1, n_points_final_grid @@ -191,7 +199,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point enddo call wall_time(wall1) - print *, ' wall time for x_v_ij_erf_rk_cst_mu', wall1 - wall0 + print *, ' wall time for x_v_ij_erf_rk_cst_mu = ', wall1 - wall0 END_PROVIDER @@ -207,6 +215,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num, integer :: i, j, ipoint double precision :: wall0, wall1 + print *, ' providing x_v_ij_erf_rk_cst_mu_transp ...' + call wall_time(wall0) do ipoint = 1, n_points_final_grid @@ -220,13 +230,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num, enddo call wall_time(wall1) - print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp', wall1 - wall0 + print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0 END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid,ao_num, ao_num,3)] +BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid, ao_num, ao_num, 3)] BEGIN_DOC ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R| @@ -236,6 +246,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi integer :: i, j, ipoint double precision :: wall0, wall1 + print *, ' providing x_v_ij_erf_rk_cst_mu_transp_bis ...' + call wall_time(wall0) do i = 1, ao_num @@ -249,7 +261,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi enddo call wall_time(wall1) - print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis', wall1 - wall0 + print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis = ', wall1 - wall0 END_PROVIDER @@ -268,7 +280,9 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin double precision :: r(3), ints(3), ints_coulomb(3) double precision :: wall0, wall1 - call wall_time(wall0) + print *, ' providing d_dx_v_ij_erf_rk_cst_mu_tmp ...' + + call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -295,7 +309,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin !$OMP END PARALLEL call wall_time(wall1) - print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0 + print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0 END_PROVIDER @@ -315,6 +329,8 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid integer :: i, j, ipoint double precision :: wall0, wall1 + print *, ' providing d_dx_v_ij_erf_rk_cst_mu ...' + call wall_time(wall0) do i = 1, ao_num do j = 1, ao_num @@ -327,7 +343,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid enddo call wall_time(wall1) - print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0 + print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0 END_PROVIDER @@ -348,6 +364,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f double precision :: r(3), ints(3), ints_coulomb(3) double precision :: wall0, wall1 + print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu_tmp ...' + call wall_time(wall0) !$OMP PARALLEL & @@ -375,7 +393,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f !$OMP END PARALLEL call wall_time(wall1) - print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0 + print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0 END_PROVIDER @@ -395,6 +413,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr integer :: i, j, ipoint double precision :: wall0, wall1 + print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu ...' + call wall_time(wall0) do i = 1, ao_num @@ -408,7 +428,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr enddo call wall_time(wall1) - print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0 + print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0 END_PROVIDER diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index c41b312d..e27bf723 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -102,6 +102,12 @@ END_PROVIDER List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i)) enddo + print *, ' coeff, expo & cent of list b2' + do i = 1, List_all_comb_b2_size + print*, i, List_all_comb_b2_coef(i), List_all_comb_b2_expo(i) + print*, List_all_comb_b2_cent(1,i), List_all_comb_b2_cent(2,i), List_all_comb_b2_cent(3,i) + enddo + END_PROVIDER ! --- @@ -219,9 +225,11 @@ END_PROVIDER List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) enddo - print *, ' 1st coeff & expo of lists' - print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1) - print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1) + print *, ' coeff, expo & cent of list b3' + do i = 1, List_all_comb_b3_size + print*, i, List_all_comb_b3_coef(i), List_all_comb_b3_expo(i) + print*, List_all_comb_b3_cent(1,i), List_all_comb_b3_cent(2,i), List_all_comb_b3_cent(3,i) + enddo END_PROVIDER diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index dc19f6c7..928053ad 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -18,6 +18,8 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] double precision :: A_center(3),B_center(3),C_center(3) double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult + ao_integrals_n_e = 0.d0 + if (read_ao_integrals_n_e) then call ezfio_get_ao_one_e_ints_ao_integrals_n_e(ao_integrals_n_e) @@ -36,8 +38,6 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] else - ao_integrals_n_e = 0.d0 - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 7321dff7..e02dea3b 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -1950,26 +1950,26 @@ xq(17)=-3.34785456738322 xq(18)=-3.94476404011563 xq(19)=-4.60368244955074 xq(20)=-5.38748089001123 -wq(1)= 2.229393645534151E-013 -wq(2)= 4.399340992273176E-010 -wq(3)= 1.086069370769280E-007 -wq(4)= 7.802556478532063E-006 -wq(5)= 2.283386360163528E-004 -wq(6)= 3.243773342237853E-003 -wq(7)= 2.481052088746362E-002 +wq(1)= 2.229393645534151D-013 +wq(2)= 4.399340992273176D-010 +wq(3)= 1.086069370769280D-007 +wq(4)= 7.802556478532063D-006 +wq(5)= 2.283386360163528D-004 +wq(6)= 3.243773342237853D-003 +wq(7)= 2.481052088746362D-002 wq(8)= 0.109017206020022 wq(9)= 0.286675505362834 wq(10)= 0.462243669600610 wq(11)= 0.462243669600610 wq(12)= 0.286675505362834 wq(13)= 0.109017206020022 -wq(14)= 2.481052088746362E-002 -wq(15)= 3.243773342237853E-003 -wq(16)= 2.283386360163528E-004 -wq(17)= 7.802556478532063E-006 -wq(18)= 1.086069370769280E-007 -wq(19)= 4.399340992273176E-010 -wq(20)= 2.229393645534151E-013 +wq(14)= 2.481052088746362D-002 +wq(15)= 3.243773342237853D-003 +wq(16)= 2.283386360163528D-004 +wq(17)= 7.802556478532063D-006 +wq(18)= 1.086069370769280D-007 +wq(19)= 4.399340992273176D-010 +wq(20)= 2.229393645534151D-013 npts=20 ! call gauher(xq,wq,npts) diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f index 902d4514..4730d003 100644 --- a/src/ao_tc_eff_map/fit_j.irp.f +++ b/src/ao_tc_eff_map/fit_j.irp.f @@ -123,6 +123,36 @@ END_PROVIDER expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) enddo + elseif(ng_fit_jast .eq. 7) then + + coef_gauss_j_mu_x = (/ -0.01756495d0 , -0.01023623d0 , -0.06548959d0 , -0.03539446d0 , -0.17150646d0 , -0.15071096d0 , -0.11326834d0 /) + expo_gauss_j_mu_x = (/ 9.88572565d+02, 1.21363371d+04, 3.69794870d+01, 1.67364529d+02, 3.03962934d+00, 1.27854005d+00, 9.76383343d+00 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + elseif(ng_fit_jast .eq. 8) then + + coef_gauss_j_mu_x = (/ -0.11489205d0 , -0.16008968d0 , -0.12892456d0 , -0.04250838d0 , -0.0718451d0 , -0.02394051d0 , -0.00913353d0 , -0.01285182d0 /) + expo_gauss_j_mu_x = (/ 6.97632442d+00, 2.56010878d+00, 1.22760977d+00, 7.47697124d+01, 2.16104215d+01, 2.96549728d+02, 1.40773328d+04, 1.43335159d+03 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + !elseif(ng_fit_jast .eq. 9) then + + ! coef_gauss_j_mu_x = (/ /) + ! expo_gauss_j_mu_x = (/ /) + + ! tmp = mu_erf * mu_erf + ! do i = 1, ng_fit_jast + ! expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + ! enddo + elseif(ng_fit_jast .eq. 20) then ASSERT(n_max_fit_slat == 20) @@ -224,6 +254,36 @@ END_PROVIDER expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) enddo + elseif(ng_fit_jast .eq. 7) then + + coef_gauss_j_mu_x_2 = (/ 0.05202849d0 , 0.01031081d0 , 0.04699157d0 , 0.01451002d0 , 0.07442576d0 , 0.02692033d0 , 0.09311842d0 /) + expo_gauss_j_mu_x_2 = (/ 3.04469415d+00, 1.40682034d+04, 7.45960945d+01, 1.43067466d+03, 2.16815661d+01, 2.95750306d+02, 7.23471236d+00 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 8) then + + coef_gauss_j_mu_x_2 = (/ 0.00942115d0 , 0.07332421d0 , 0.0508308d0 , 0.08204949d0 , 0.0404099d0 , 0.03201288d0 , 0.01911313d0 , 0.01114732d0 /) + expo_gauss_j_mu_x_2 = (/ 1.56957321d+04, 1.52867810d+01, 4.36016903d+01, 5.96818956d+00, 2.85535269d+00, 1.36064008d+02, 4.71968910d+02, 1.92022350d+03 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo + + !elseif(ng_fit_jast .eq. 9) then + + ! coef_gauss_j_mu_x_2 = (/ /) + ! expo_gauss_j_mu_x_2 = (/ /) + ! + ! tmp = mu_erf * mu_erf + ! do i = 1, ng_fit_jast + ! expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + ! enddo + elseif(ng_fit_jast .eq. 20) then ASSERT(n_max_fit_slat == 20) @@ -328,6 +388,36 @@ END_PROVIDER expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) enddo + elseif(ng_fit_jast .eq. 7) then + + coef_gauss_j_mu_1_erf = (/ -0.11853067d0 , -0.01522824d0 , -0.07419098d0 , -0.022202d0 , -0.12242283d0 , -0.04177571d0 , -0.16983107d0 /) + expo_gauss_j_mu_1_erf = (/ 2.74057056d+00, 1.37626591d+04, 6.65578663d+01, 1.34693031d+03, 1.90547699d+01, 2.69445390d+02, 6.31845879d+00/) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo + + elseif(ng_fit_jast .eq. 8) then + + coef_gauss_j_mu_1_erf = (/ -0.12263328d0 , -0.04965255d0 , -0.15463564d0 , -0.09675781d0 , -0.0807023d0 , -0.02923298d0 , -0.01381381d0 , -0.01675923d0 /) + expo_gauss_j_mu_1_erf = (/ 1.36101994d+01, 1.24908367d+02, 5.29061388d+00, 2.60692516d+00, 3.93396935d+01, 4.43071610d+02, 1.54902240d+04, 1.85170446d+03 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo + + !elseif(ng_fit_jast .eq. 9) then + + ! coef_gauss_j_mu_1_erf = (/ /) + ! expo_gauss_j_mu_1_erf = (/ /) + + ! tmp = mu_erf * mu_erf + ! do i = 1, ng_fit_jast + ! expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + ! enddo + elseif(ng_fit_jast .eq. 20) then ASSERT(n_max_fit_slat == 20) diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f index 37291563..5b72b567 100644 --- a/src/ao_tc_eff_map/potential.irp.f +++ b/src/ao_tc_eff_map/potential.irp.f @@ -1,59 +1,79 @@ +! --- + BEGIN_PROVIDER [integer, n_gauss_eff_pot] - implicit none - BEGIN_DOC -! number of gaussians to represent the effective potential : -! -! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2) -! -! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021) - END_DOC - n_gauss_eff_pot = n_max_fit_slat + 1 + + BEGIN_DOC + ! number of gaussians to represent the effective potential : + ! + ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2) + ! + ! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021) + END_DOC + + implicit none + + n_gauss_eff_pot = ng_fit_jast + 1 + END_PROVIDER +! --- + BEGIN_PROVIDER [integer, n_gauss_eff_pot_deriv] - implicit none - BEGIN_DOC -! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021) - END_DOC - n_gauss_eff_pot_deriv = n_max_fit_slat + + BEGIN_DOC + ! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021) + END_DOC + + implicit none + n_gauss_eff_pot_deriv = ng_fit_jast + END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, expo_gauss_eff_pot, (n_gauss_eff_pot)] &BEGIN_PROVIDER [double precision, coef_gauss_eff_pot, (n_gauss_eff_pot)] - implicit none - BEGIN_DOC -! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2) -! -! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2) -! -! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021) - END_DOC - include 'constants.include.F' - integer :: i - ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians - do i = 1, n_max_fit_slat - expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) - coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2 - enddo - ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) - expo_gauss_eff_pot(n_max_fit_slat+1) = mu_erf * mu_erf - coef_gauss_eff_pot(n_max_fit_slat+1) = 1.d0 * mu_erf * inv_sq_pi + BEGIN_DOC + ! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2) + ! + ! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2) + ! + ! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021) + END_DOC + + include 'constants.include.F' + + implicit none + integer :: i + + ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians + do i = 1, ng_fit_jast + expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) + coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2 + enddo + + ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) + expo_gauss_eff_pot(ng_fit_jast+1) = mu_erf * mu_erf + coef_gauss_eff_pot(ng_fit_jast+1) = 1.d0 * mu_erf * inv_sq_pi END_PROVIDER +! --- + +double precision function eff_pot_gauss(x, mu) + + BEGIN_DOC + ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2) + END_DOC + + implicit none + double precision, intent(in) :: x, mu + + eff_pot_gauss = mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0 -double precision function eff_pot_gauss(x,mu) - implicit none - BEGIN_DOC - ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2) - END_DOC - double precision, intent(in) :: x,mu - eff_pot_gauss = mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0 end - - ! ------------------------------------------------------------------------------------------------- ! --- @@ -129,16 +149,19 @@ END_PROVIDER ! --- double precision function fit_1_erf_x(x) - implicit none - double precision, intent(in) :: x - BEGIN_DOC -! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x)) - END_DOC - integer :: i - fit_1_erf_x = 0.d0 - do i = 1, n_max_fit_slat - fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i) - enddo + + BEGIN_DOC + ! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x)) + END_DOC + + implicit none + integer :: i + double precision, intent(in) :: x + + fit_1_erf_x = 0.d0 + do i = 1, n_max_fit_slat + fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i) + enddo end @@ -209,6 +232,36 @@ end expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) enddo + elseif(ng_fit_jast .eq. 7) then + + coef_gauss_1_erf_x_2 = (/ 0.0213619d0 , 0.03221511d0 , 0.29966689d0 , 0.19178934d0 , 0.06154732d0 , 0.28214555d0 , 0.11125985d0 /) + expo_gauss_1_erf_x_2 = (/ 1.34727067d+04, 1.27166613d+03, 5.52584567d+00, 1.67753218d+01, 2.46145691d+02, 2.47971820d+00, 5.95141293d+01 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 8) then + + coef_gauss_1_erf_x_2 = (/ 0.28189124d0 , 0.19518669d0 , 0.12161735d0 , 0.24257438d0 , 0.07309656d0 , 0.042435d0 , 0.01926109d0 , 0.02393415d0 /) + expo_gauss_1_erf_x_2 = (/ 4.69795903d+00, 1.21379451d+01, 3.55527053d+01, 2.39227172d+00, 1.14827721d+02, 4.16320213d+02, 1.52813587d+04, 1.78516557d+03 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + !elseif(ng_fit_jast .eq. 9) then + + ! coef_gauss_1_erf_x_2 = (/ /) + ! expo_gauss_1_erf_x_2 = (/ /) + + ! tmp = mu_erf * mu_erf + ! do i = 1, ng_fit_jast + ! expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + ! enddo + elseif(ng_fit_jast .eq. 20) then ASSERT(n_max_fit_slat == 20) diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index c077dea1..4694a998 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -107,14 +107,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, integer :: i, j, ipoint double precision :: wall0, wall1 + print *, ' providing int2_grad1_u12_ao_transp ...' call wall_time(wall0) + if(test_cycle_tc)then do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = 1, ao_num - int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(1,j,i,ipoint) - int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(2,j,i,ipoint) - int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(3,j,i,ipoint) + int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1) + int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2) + int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3) enddo enddo enddo @@ -122,9 +124,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = 1, ao_num - int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint) - int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint) - int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint) + int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1) + int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2) + int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3) enddo enddo enddo @@ -192,9 +194,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = 1, ao_num - int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(1,j,i,ipoint) - int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(2,j,i,ipoint) - int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(3,j,i,ipoint) + int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1) + int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2) + int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3) enddo enddo enddo @@ -203,40 +205,6 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo, (3, mo_num, mo_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! int2_grad1_u12_bimo(:,k,i,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \chi_k(r2) \phi_i(r2) - ! - END_DOC - - implicit none - integer :: ipoint - print*,'Wrong !!' - stop - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao,int2_grad1_u12_bimo) - !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - - call ao_to_mo_bi_ortho( int2_grad1_u12_ao (1,1,1,ipoint), size(int2_grad1_u12_ao , 2) & - , int2_grad1_u12_bimo(1,1,1,ipoint), size(int2_grad1_u12_bimo, 2) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao (2,1,1,ipoint), size(int2_grad1_u12_ao , 2) & - , int2_grad1_u12_bimo(2,1,1,ipoint), size(int2_grad1_u12_bimo, 2) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao (3,1,1,ipoint), size(int2_grad1_u12_ao , 2) & - , int2_grad1_u12_bimo(3,1,1,ipoint), size(int2_grad1_u12_bimo, 2) ) - - enddo - !$OMP END DO - !$OMP END PARALLEL - -END_PROVIDER - -! --- - BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, 3, mo_num, mo_num)] implicit none diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f index 56f44da1..2dad9485 100644 --- a/src/bi_ortho_mos/bi_density.irp.f +++ b/src/bi_ortho_mos/bi_density.irp.f @@ -2,49 +2,68 @@ ! --- BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ] + + BEGIN_DOC + ! TCSCF_bi_ort_dm_ao_alpha(i,j) = where i,j are AO basis. + ! + ! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> + END_DOC + implicit none - BEGIN_DOC -! TCSCF_bi_ort_dm_ao_alpha(i,j) = where i,j are AO basis. -! -! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> - END_DOC + + PROVIDE mo_l_coef mo_r_coef + call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) ) + END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ] + + BEGIN_DOC + ! TCSCF_bi_ort_dm_ao_beta(i,j) = where i,j are AO basis. + ! + ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> + END_DOC + implicit none - BEGIN_DOC -! TCSCF_bi_ort_dm_ao_beta(i,j) = where i,j are AO basis. -! -! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> - END_DOC + + PROVIDE mo_l_coef mo_r_coef + call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) ) + END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao, (ao_num, ao_num) ] + + BEGIN_DOC + ! TCSCF_bi_ort_dm_ao(i,j) = where i,j are AO basis. + ! + ! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> + END_DOC + implicit none - BEGIN_DOC -! TCSCF_bi_ort_dm_ao(i,j) = where i,j are AO basis. -! -! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> - END_DOC - ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1) ) - if( elec_alpha_num==elec_beta_num ) then + + PROVIDE mo_l_coef mo_r_coef + + ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1)) + + if(elec_alpha_num==elec_beta_num) then TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_alpha else - ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1)) + ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1)) TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_beta endif + END_PROVIDER ! --- diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 3510db37..bad6434f 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -253,9 +253,9 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM buildTreeDriver(bftree, *NSOMO, MS, NBF); } -void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols); +//void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols); + -// QR to orthogonalize CSFs does not work //void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){ // int i,j; // //for(j=0;j SOMO 2->DOMO +!!! integer :: nholes +!!! integer :: nvmos +!!! integer :: listvmos(mo_num) +!!! integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO +!!! integer*8 :: Idomo, Idomop, Idomoq +!!! integer*8 :: Isomo, Isomop, Isomoq +!!! integer*8 :: Jdomo, Jdomop, Jdomoq +!!! integer*8 :: Jsomo, Jsomop, Jsomoq +!!! integer*8 :: diffSOMO +!!! integer*8 :: diffDOMO +!!! integer*8 :: xordiffSOMODOMO +!!! integer :: ndiffSOMO +!!! integer :: ndiffDOMO +!!! integer :: nxordiffSOMODOMO +!!! integer :: ndiffAll +!!! integer :: i,ii,iii +!!! integer :: j,jj, i_s, i_d +!!! integer :: k,kk +!!! integer :: kstart +!!! integer :: kend +!!! integer :: Nsomo_I, Nsomo_J +!!! integer :: hole, n_core_orb_64 +!!! integer :: p, pp, p_s +!!! integer :: q, qq, q_s +!!! integer :: countalphas +!!! logical :: pqAlreadyGenQ +!!! logical :: pqExistsQ +!!! logical :: ppExistsQ +!!! integer*8 :: MS +!!! integer :: listall(N_int*bit_kind_size), nelall +!!! +!!! double precision :: t0, t1 +!!! call wall_time(t0) +!!! +!!! MS = elec_alpha_num-elec_beta_num +!!! +!!! allocate(tableUniqueAlphas(mo_num,mo_num)) +!!! NalphaIcfg_list = 0 +!!! +!!! do idxI = 1, N_configuration +!!! +!!! Icfg = psi_configuration(:,:,idxI) +!!! Jcfg = psi_configuration(:,:,idxI) +!!! !print *," Jcfg somo=",Jcfg(1,1), " ", Jcfg(2,1) +!!! !print *," Jcfg domo=",Jcfg(1,2), " ", Jcfg(2,2) +!!! +!!! Isomo = iand(act_bitmask(1,1),Icfg(1,1)) +!!! Idomo = iand(act_bitmask(1,1),Icfg(1,2)) +!!! +!!! ! find out all pq holes possible +!!! nholes = 0 +!!! ! holes in SOMO +!!! !do ii = 1,n_act_orb +!!! ! i = list_act(ii) +!!! ! if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then +!!! ! nholes += 1 +!!! ! listholes(nholes) = i +!!! ! holetype(nholes) = 1 +!!! ! endif +!!! !end do +!!! call bitstring_to_list(psi_configuration(1,1,idxI),listall,nelall,N_int) +!!! +!!! !print *,'list somo' +!!! do iii=1,nelall +!!! nholes += 1 +!!! listholes(nholes) = listall(iii) +!!! !print *,listall(iii) +!!! holetype(nholes) = 1 +!!! end do +!!! +!!! Nsomo_I = nelall +!!! +!!! ! holes in DOMO +!!! !do ii = 1,n_act_orb +!!! ! i = list_act(ii) +!!! ! if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then +!!! ! nholes += 1 +!!! ! listholes(nholes) = i +!!! ! holetype(nholes) = 2 +!!! ! endif +!!! !end do +!!! +!!! !do iii=1,N_int +!!! ! print *,' iii=',iii, psi_configuration(iii,2,idxI), ' idxI=',idxI +!!! !end do +!!! call bitstring_to_list(psi_configuration(1,2,idxI),listall,nelall,N_int) +!!! +!!! !print *,'list domo ncore=',n_core_orb, ' nelall=',nelall +!!! do iii=1,nelall +!!! if(listall(iii) .gt. n_core_orb)then +!!! nholes += 1 +!!! listholes(nholes) = listall(iii) +!!! !print *,listall(iii) +!!! holetype(nholes) = 2 +!!! endif +!!! end do +!!! +!!! ! find vmos +!!! listvmos = -1 +!!! vmotype = -1 +!!! nvmos = 0 +!!! !do ii = 1,n_act_orb +!!! ! i = list_act(ii) +!!! ! if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then +!!! ! if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then +!!! ! nvmos += 1 +!!! ! listvmos(nvmos) = i +!!! ! print *,'1 i=',i +!!! ! vmotype(nvmos) = 1 +!!! ! else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then +!!! ! nvmos += 1 +!!! ! listvmos(nvmos) = i +!!! ! print *,'2 i=',i +!!! ! vmotype(nvmos) = 2 +!!! ! end if +!!! ! end if +!!! !end do +!!! !print *,'-----------' +!!! +!!! ! Take into account N_int +!!! do ii = 1, n_act_orb +!!! iii = list_act(ii) +!!! i_s = (1+((iii-1)/63)) +!!! i = iii - ( i_s -1 )*63 +!!! Isomo = iand(act_bitmask(i_s,1),Icfg(i_s,1)) +!!! Idomo = iand(act_bitmask(i_s,1),Icfg(i_s,2)) +!!! +!!! if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then +!!! if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then +!!! nvmos += 1 +!!! listvmos(nvmos) = iii +!!! vmotype(nvmos) = 1 +!!! else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then +!!! nvmos += 1 +!!! listvmos(nvmos) = iii +!!! vmotype(nvmos) = 2 +!!! end if +!!! end if +!!! end do +!!! +!!! tableUniqueAlphas = .FALSE. +!!! +!!! ! Now find the allowed (p,q) excitations +!!! Isomo = iand(act_bitmask(1,1),Icfg(1,1)) +!!! Idomo = iand(act_bitmask(1,1),Icfg(1,2)) +!!! !Nsomo_I = POPCNT(Isomo) +!!! if(Nsomo_I .EQ. 0) then +!!! kstart = 1 +!!! else +!!! kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) +!!! endif +!!! kend = idxI-1 +!!! +!!! do i = 1,nholes +!!! pp = listholes(i) +!!! p_s = (1+((pp-1)/63)) +!!! p = pp - (p_s - 1)*63 +!!! !print *,' pp=',pp, ' p_s=',p_s, ' p=',p +!!! do j = 1,nvmos +!!! qq = listvmos(j) +!!! q_s = (1+((qq-1)/63)) +!!! q = qq - (q_s - 1)*63 +!!! !print *,' qq=',qq, ' q_s=',q_s, ' q=',q +!!! Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1)) +!!! Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2)) +!!! Isomop = iand(act_bitmask(i_s,1),Icfg(q_s,1)) +!!! Idomop = iand(act_bitmask(i_s,1),Icfg(q_s,2)) +!!! if(p .EQ. q) cycle +!!! if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then +!!! ! SOMO -> VMO +!!! !print *,'SOMO -> VMO' +!!! if (p_s .eq. q_s) then +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomop = IBSET(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! else +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomoq = IBSET(Isomoq,q-1) +!!! endif +!!! +!!! ! Domo remains the same +!!! Jdomop = Idomop +!!! Jdomoq = Idomoq +!!! +!!! kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) +!!! kend = idxI-1 +!!! else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then +!!! ! SOMO -> SOMO +!!! !print *,'SOMO -> SOMO' +!!! if(p_s .eq. q_s) then +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomop = IBCLR(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! else +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomoq = IBCLR(Isomoq,q-1) +!!! endif +!!! +!!! Jdomoq = IBSET(Idomoq,q-1) +!!! +!!! ! Check for Minimal alpha electrons (MS) +!!! if(POPCNT(Jsomoq).ge.MS)then +!!! kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) +!!! kend = idxI-1 +!!! else +!!! cycle +!!! endif +!!! else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then +!!! ! DOMO -> VMO +!!! !print *,'DOMO -> VMO', Isomop, p, q, Jsomop +!!! if(p_s .eq. q_s) then +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomop = IBSET(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! else +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomoq = IBSET(Jsomoq,q-1) +!!! endif +!!! !print *, 'Jsomop=', Jsomop +!!! +!!! Jdomop = IBCLR(Idomop,p-1) +!!! +!!! kstart = cfg_seniority_index(Nsomo_I) +!!! kend = idxI-1 +!!! else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then +!!! ! DOMO -> SOMO +!!! !print *,'DOMO -> SOMO' +!!! if(p_s .eq. q_s) then +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomop = IBCLR(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! +!!! Jdomop = IBCLR(Idomop,p-1) +!!! Jdomop = IBSET(Jdomop,q-1) +!!! Jdomoq = Jdomop +!!! else +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomoq = IBCLR(Jsomoq,q-1) +!!! +!!! Jdomop = IBCLR(Idomop,p-1) +!!! Jdomoq = IBSET(Jdomoq,q-1) +!!! endif +!!! +!!! kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) +!!! kend = idxI-1 +!!! else +!!! print*,"Something went wrong in obtain_associated_alphaI" +!!! endif +!!! +!!! ! Save it to Jcfg +!!! !print *,i,j,"0| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1) +!!! Jcfg(p_s,1) = Jsomop +!!! Jcfg(q_s,1) = Jsomoq +!!! Jcfg(p_s,2) = Jdomop +!!! Jcfg(q_s,2) = Jdomoq +!!! !print *,'p_s=',p_s,' q_s=', q_s +!!! !print *,'Jsomop=',Jsomop, ' Jsomoq=', Jsomoq, ' Jdomop=', Jdomop, ' Jdomoq=', Jdomo +!!! !print *,i,j,"1| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1) +!!! call bitstring_to_list(Jcfg(1,1),listall,nelall,N_int) +!!! Nsomo_J = nelall +!!! +!!! ! Check for Minimal alpha electrons (MS) +!!! if(Nsomo_J.lt.MS)then +!!! cycle +!!! endif +!!! +!!! ! Again, we don't have to search from 1 +!!! ! we just use seniority to find the +!!! ! first index with NSOMO - 2 to NSOMO + 2 +!!! ! this is what is done in kstart, kend +!!! +!!! pqAlreadyGenQ = .FALSE. +!!! ! First check if it can be generated before +!!! do k = kstart, kend +!!! !diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) +!!! !ndiffSOMO = POPCNT(diffSOMO) +!!! !if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle +!!! !diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) +!!! !xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) +!!! !ndiffDOMO = POPCNT(diffDOMO) +!!! !nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) +!!! !nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO +!!! +!!! ndiffSOMO = 0 +!!! ndiffDOMO = 0 +!!! nxordiffSOMODOMO = 0 +!!! do ii = 1, N_int +!!! Jsomo = Jcfg(ii,1) +!!! Jdomo = Jcfg(ii,2) +!!! diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k))) +!!! ndiffSOMO += POPCNT(diffSOMO) +!!! diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k))) +!!! xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) +!!! ndiffDOMO += POPCNT(diffDOMO) +!!! nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) +!!! nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO +!!! end do +!!! +!!! if((ndiffSOMO .ne. 0) .and. (ndiffSOMO .ne. 2)) cycle +!!! +!!! if((ndiffSOMO+ndiffDOMO) .EQ. 0) then +!!! pqAlreadyGenQ = .TRUE. +!!! ppExistsQ = .TRUE. +!!! EXIT +!!! endif +!!! if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then +!!! pqAlreadyGenQ = .TRUE. +!!! EXIT +!!! endif +!!! end do +!!! +!!! if(pqAlreadyGenQ) cycle +!!! +!!! pqExistsQ = .FALSE. +!!! +!!! if(.NOT. pqExistsQ) then +!!! tableUniqueAlphas(p,q) = .TRUE. +!!! endif +!!! end do +!!! end do +!!! +!!! !print *,tableUniqueAlphas(:,:) +!!! +!!! ! prune list of alphas +!!! Isomo = Icfg(1,1) +!!! Idomo = Icfg(1,2) +!!! Jsomo = Icfg(1,1) +!!! Jdomo = Icfg(1,2) +!!! NalphaIcfg = 0 +!!! do i = 1, nholes +!!! !p = listholes(i) +!!! pp = listholes(i) +!!! p_s = (1+((pp-1)/63)) +!!! p = pp - (p_s - 1)*63 +!!! do j = 1, nvmos +!!! !q = listvmos(j) +!!! qq = listvmos(j) +!!! q_s = (1+((qq-1)/63)) +!!! q = qq - (q_s - 1)*63 +!!! Isomop = iand(act_bitmask(i_s,1),Icfg(p_s,1)) +!!! Idomop = iand(act_bitmask(i_s,1),Icfg(p_s,2)) +!!! Isomoq = iand(act_bitmask(i_s,1),Icfg(q_s,1)) +!!! Idomoq = iand(act_bitmask(i_s,1),Icfg(q_s,2)) +!!! if(p .EQ. q) cycle +!!! if(tableUniqueAlphas(p,q)) then +!!! if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then +!!! ! SOMO -> VMO +!!! !Jsomo = IBCLR(Isomo,p-1) +!!! !Jsomo = IBSET(Jsomo,q-1) +!!! !Jdomo = Idomo +!!! if (p_s .eq. q_s) then +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomop = IBSET(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! else +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomoq = IBSET(Isomoq,q-1) +!!! endif +!!! +!!! ! Domo remains the same +!!! Jdomop = Idomop +!!! Jdomoq = Idomoq +!!! +!!! else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then +!!! ! SOMO -> SOMO +!!! !Jsomo = IBCLR(Isomo,p-1) +!!! !Jsomo = IBCLR(Jsomo,q-1) +!!! !Jdomo = IBSET(Idomo,q-1) +!!! +!!! if(p_s .eq. q_s) then +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomop = IBCLR(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! else +!!! Jsomop = IBCLR(Isomop,p-1) +!!! Jsomoq = IBCLR(Isomoq,q-1) +!!! endif +!!! +!!! Jdomoq = IBSET(Idomoq,q-1) +!!! +!!! if(POPCNT(Jsomoq).ge.MS)then +!!! kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) +!!! kend = idxI-1 +!!! else +!!! cycle +!!! endif +!!! else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then +!!! ! DOMO -> VMO +!!! !Jsomo = IBSET(Isomo,p-1) +!!! !Jsomo = IBSET(Jsomo,q-1) +!!! !Jdomo = IBCLR(Idomo,p-1) +!!! +!!! if(p_s .eq. q_s) then +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomop = IBSET(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! else +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomoq = IBSET(Jsomoq,q-1) +!!! endif +!!! +!!! Jdomop = IBCLR(Idomop,p-1) +!!! +!!! else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then +!!! ! DOMO -> SOMO +!!! !Jsomo = IBSET(Isomo,p-1) +!!! !Jsomo = IBCLR(Jsomo,q-1) +!!! !Jdomo = IBCLR(Idomo,p-1) +!!! !Jdomo = IBSET(Jdomo,q-1) +!!! if(p_s .eq. q_s) then +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomop = IBCLR(Jsomop,q-1) +!!! Jsomoq = Jsomop +!!! +!!! Jdomop = IBCLR(Idomop,p-1) +!!! Jdomop = IBSET(Jdomop,q-1) +!!! Jdomoq = Jdomop +!!! else +!!! Jsomop = IBSET(Isomop,p-1) +!!! Jsomoq = IBCLR(Jsomoq,q-1) +!!! +!!! Jdomop = IBCLR(Idomop,p-1) +!!! Jdomoq = IBSET(Jdomoq,q-1) +!!! endif +!!! +!!! else +!!! print*,"Something went wrong in obtain_associated_alphaI" +!!! endif +!!! +!!! ! Save it to Jcfg +!!! Jcfg(p_s,1) = Jsomop +!!! Jcfg(q_s,1) = Jsomoq +!!! Jcfg(p_s,2) = Jdomop +!!! Jcfg(q_s,2) = Jdomoq +!!! +!!! ! SOMO +!!! !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) +!!! if(POPCNT(Jsomo) .ge. NSOMOMin) then +!!! NalphaIcfg += 1 +!!! alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1) +!!! !alphasIcfg_list(:,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) +!!! if(n_core_orb .le. 63)then +!!! alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1) +!!! else +!!! n_core_orb_64 = n_core_orb +!!! do ii=1,N_int +!!! if(n_core_orb_64 .gt. 0)then +!!! alphasIcfg_list(ii,2,idxI,NalphaIcfg) = IOR(Jcfg(ii,2),ISHFT(1_8,n_core_orb_64)-1) +!!! else +!!! alphasIcfg_list(ii,2,idxI,NalphaIcfg) = Jcfg(ii,2) +!!! endif +!!! n_core_orb_64 = ISHFT(n_core_orb_64,-6) +!!! end do +!!! endif +!!! NalphaIcfg_list(idxI) = NalphaIcfg +!!! !print *,i,j,"2| nalpha=",NalphaIcfg, " somo=",Jcfg(1,1),Jcfg(2,1) +!!! endif +!!! endif +!!! end do +!!! end do +!!! +!!! ! Check if this Icfg has been previously generated as a mono +!!! ppExistsQ = .False. +!!! Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) +!!! Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) +!!! kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) +!!! ndiffDOMO = 0 +!!! do k = kstart, idxI-1 +!!! do ii=1,N_int +!!! diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k))) +!!! ndiffSOMO += POPCNT(diffSOMO) +!!! end do +!!! ! ndiffSOMO cannot be 0 (I /= k) +!!! ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense +!!! ! this Icfg could not have been generated before. +!!! if (ndiffSOMO /= 2) cycle +!!! ndiffDOMO = 0 +!!! nxordiffSOMODOMO = 0 +!!! do ii=1,N_int +!!! diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,1),psi_configuration(ii,2,k))) +!!! xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) +!!! ndiffDOMO += POPCNT(diffDOMO) +!!! nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) +!!! end do +!!! if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then +!!! ppExistsQ = .TRUE. +!!! EXIT +!!! endif +!!! end do +!!! ! Diagonal part (pp,qq) +!!! if(nholes > 0 .AND. (.NOT. ppExistsQ))then +!!! ! SOMO +!!! if(POPCNT(Jsomo) .ge. NSOMOMin) then +!!! NalphaIcfg += 1 +!!! alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1) +!!! alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2) +!!! NalphaIcfg_list(idxI) = NalphaIcfg +!!! endif +!!! endif +!!! +!!! NalphaIcfg = 0 +!!! enddo ! end loop idxI +!!! call wall_time(t1) +!!! print *, 'Preparation : ', t1 - t0 +!!! +!!!END_PROVIDER + + BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*mo_num)] &BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ] implicit none !use bitmasks @@ -12,6 +536,7 @@ use bitmasks integer :: idxI ! The id of the Ith CFG integer(bit_kind) :: Icfg(N_int,2) + integer(bit_kind) :: Jcfg(N_int,2) integer :: NalphaIcfg logical,dimension(:,:),allocatable :: tableUniqueAlphas integer :: listholes(mo_num) @@ -20,31 +545,32 @@ use bitmasks integer :: nvmos integer :: listvmos(mo_num) integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO - integer*8 :: Idomo - integer*8 :: Isomo - integer*8 :: Jdomo - integer*8 :: Jsomo - integer*8 :: diffSOMO - integer*8 :: diffDOMO - integer*8 :: xordiffSOMODOMO + integer(bit_kind) :: Idomo(N_int), Idomop(N_int), Idomoq(N_int) + integer(bit_kind) :: Isomo(N_int), Isomop(N_int), Isomoq(N_int) + integer(bit_kind) :: Jdomo(N_int), Jdomop(N_int), Jdomoq(N_int) + integer(bit_kind) :: Jsomo(N_int), Jsomop(N_int), Jsomoq(N_int) + !integer(bit_kind) :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int) + integer(bit_kind) :: diffDOMO, xordiffSOMODOMO, diffSOMO integer :: ndiffSOMO integer :: ndiffDOMO integer :: nxordiffSOMODOMO integer :: ndiffAll - integer :: i,ii - integer :: j,jj + integer :: i,ii,iii, iint, jint, ipos, jpos + integer :: j,jj, i_s, i_d integer :: k,kk integer :: kstart integer :: kend - integer :: Nsomo_I - integer :: hole - integer :: p - integer :: q + integer :: Nsomo_I, Nsomo_J + integer :: hole, n_core_orb_64 + integer :: p, pp, p_s + integer :: q, qq, q_s integer :: countalphas + integer :: countelec logical :: pqAlreadyGenQ logical :: pqExistsQ logical :: ppExistsQ integer*8 :: MS + integer :: listall(N_int*bit_kind_size), nelall double precision :: t0, t1 call wall_time(t0) @@ -57,45 +583,57 @@ use bitmasks do idxI = 1, N_configuration Icfg = psi_configuration(:,:,idxI) + Jcfg = psi_configuration(:,:,idxI) - Isomo = iand(act_bitmask(1,1),Icfg(1,1)) - Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + !print *,"idxI=",idxI + do ii=1, N_int + Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI)) + Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI)) + !print *,Isomo(ii), Idomo(ii) + enddo ! find out all pq holes possible nholes = 0 - ! holes in SOMO - do ii = 1,n_act_orb - i = list_act(ii) - if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = i - holetype(nholes) = 1 - endif - end do - ! holes in DOMO - do ii = 1,n_act_orb - i = list_act(ii) - if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = i - holetype(nholes) = 2 - endif - end do + listholes=-1 + call bitstring_to_list(Isomo,listall,nelall,N_int) + + do iii=1,nelall + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 1 + end do + + Nsomo_I = nelall + + call bitstring_to_list(Idomo,listall,nelall,N_int) + + do iii=1,nelall + if(listall(iii) .gt. n_core_orb)then + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 2 + endif + end do ! find vmos listvmos = -1 vmotype = -1 nvmos = 0 - do ii = 1,n_act_orb - i = list_act(ii) - if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then - if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then + + ! Take into account N_int + do ii = 1, n_act_orb + iii = list_act(ii) + iint = shiftr(iii-1,bit_kind_shift) + 1 + ipos = iii-shiftl((iint-1),bit_kind_shift)-1 + + if(IAND(Idomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then + if(IAND(Isomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then nvmos += 1 - listvmos(nvmos) = i + listvmos(nvmos) = iii vmotype(nvmos) = 1 - else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then + else if(POPCNT(IAND(Isomo(iint),(IBSET(0_8,ipos)))) .EQ. 1) then nvmos += 1 - listvmos(nvmos) = i + listvmos(nvmos) = iii vmotype(nvmos) = 2 end if end if @@ -104,60 +642,63 @@ use bitmasks tableUniqueAlphas = .FALSE. ! Now find the allowed (p,q) excitations - Isomo = iand(act_bitmask(1,1),Icfg(1,1)) - Idomo = iand(act_bitmask(1,1),Icfg(1,2)) - Nsomo_I = POPCNT(Isomo) + do ii=1, N_int + !Isomo(ii) = iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,idxI)) + !Idomo(ii) = iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,idxI)) + Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI)) + Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI)) + !Isomo(ii) = psi_configuration(ii,1,idxI) + !Idomo(ii) = psi_configuration(ii,2,idxI) + Jsomo(ii) = Isomo(ii) + Jdomo(ii) = Idomo(ii) + enddo + if(Nsomo_I .EQ. 0) then kstart = 1 else kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) endif + kstart = 1 kend = idxI-1 do i = 1,nholes - p = listholes(i) + pp = listholes(i) + iint = shiftr(pp-1,bit_kind_shift) + 1 + ipos = pp-shiftl((iint-1),bit_kind_shift)-1 + if(holetype(i) == 1)then + Jsomo(iint) = IBCLR(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBCLR(Jdomo(iint),ipos) + Jsomo(iint) = IBSET(Jsomo(iint),ipos) + endif + do j = 1,nvmos - q = listvmos(j) - if(p .EQ. q) cycle - if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then - ! SOMO -> VMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = Idomo - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) - kend = idxI-1 - else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then - ! SOMO -> SOMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBSET(Idomo,q-1) - ! Check for Minimal alpha electrons (MS) - if(POPCNT(Jsomo).ge.MS)then - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) - kend = idxI-1 - else - cycle - endif - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then - ! DOMO -> VMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - kstart = cfg_seniority_index(Nsomo_I) - kend = idxI-1 - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then - ! DOMO -> SOMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - Jdomo = IBSET(Jdomo,q-1) - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) - kend = idxI-1 - else - print*,"Something went wrong in obtain_associated_alphaI" + qq = listvmos(j) + if(pp.eq.qq) cycle + jint = shiftr(qq-1,bit_kind_shift) + 1 + jpos = qq-shiftl((jint-1),bit_kind_shift)-1 + if(vmotype(j) == 1)then + Jsomo(jint) = IBSET(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBSET(Jdomo(jint),jpos) + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) endif + + Nsomo_J=0 + do ii=1, N_int + Jcfg(ii,1) = Jsomo(ii) + Jcfg(ii,2) = Jdomo(ii) + Nsomo_J += POPCNT(Jsomo(ii)) + enddo + ! Check for Minimal alpha electrons (MS) - if(POPCNT(Jsomo).lt.MS)then + if(Nsomo_J.lt.MS)then + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + Jsomo(jint) = IBSET(Jsomo(jint),jpos) + endif cycle endif @@ -169,15 +710,21 @@ use bitmasks pqAlreadyGenQ = .FALSE. ! First check if it can be generated before do k = kstart, kend - diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) - ndiffSOMO = POPCNT(diffSOMO) - if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle - diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) - xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) - ndiffDOMO = POPCNT(diffDOMO) - nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) - nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO - !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + ndiffSOMO = 0 + ndiffDOMO = 0 + nxordiffSOMODOMO = 0 + do ii = 1, N_int + diffSOMO = IEOR(Jcfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k))) + ndiffSOMO += POPCNT(diffSOMO) + diffDOMO = IEOR(Jcfg(ii,2),iand(act_bitmask(ii,2),psi_configuration(ii,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO += POPCNT(diffDOMO) + nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO) + end do + + if((ndiffSOMO .ne. 0) .and. (ndiffSOMO .ne. 2)) cycle + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then pqAlreadyGenQ = .TRUE. ppExistsQ = .TRUE. @@ -189,86 +736,166 @@ use bitmasks endif end do - if(pqAlreadyGenQ) cycle + if(pqAlreadyGenQ) then + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + Jsomo(jint) = IBSET(Jsomo(jint),jpos) + endif + cycle + endif pqExistsQ = .FALSE. + !print *, " ndiffSOMO=",ndiffSOMO, " ndiffDOMO=", ndiffDOMO, " nxordiffSOMODOMO=",nxordiffSOMODOMO, " p=",pp," q=",qq + if(.NOT. pqExistsQ) then - tableUniqueAlphas(p,q) = .TRUE. + tableUniqueAlphas(pp,qq) = .TRUE. + endif + + + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + Jsomo(jint) = IBSET(Jsomo(jint),jpos) endif end do + if(holetype(i) == 1)then + Jsomo(iint) = IBSET(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBSET(Jdomo(iint),ipos) + Jsomo(iint) = IBCLR(Jsomo(iint),ipos) + endif end do !print *,tableUniqueAlphas(:,:) - ! prune list of alphas - Isomo = Icfg(1,1) - Idomo = Icfg(1,2) - Jsomo = Icfg(1,1) - Jdomo = Icfg(1,2) + do ii=1, N_int + Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI)) + Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI)) + !Isomo(ii) = psi_configuration(ii,1,idxI) + !Idomo(ii) = psi_configuration(ii,2,idxI) + Jsomo(ii) = Isomo(ii) + Jdomo(ii) = Idomo(ii) + enddo + !print *, " Isomo=",Isomo(1), " Idomo=", Idomo(1) + + !countelec=0 + !do ii=1, N_int + ! countelec += POPCNT(Icfg(ii,1))*1 + POPCNT(Icfg(ii,2))*2 + !enddo + !if(countelec .ne. 14)then + ! print *," idxI=",idxI, "00countelec=",countelec, " bit_kind_size=",bit_kind_size, " nvmo=",nvmos," mo_num=",mo_num + ! stop + !endif + NalphaIcfg = 0 do i = 1, nholes - p = listholes(i) - do j = 1, nvmos - q = listvmos(j) - if(p .EQ. q) cycle - if(tableUniqueAlphas(p,q)) then - if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then - ! SOMO -> VMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = Idomo - else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then - ! SOMO -> SOMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBSET(Idomo,q-1) - if(POPCNT(Jsomo).ge.MS)then - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) - kend = idxI-1 - else - cycle - endif - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then - ! DOMO -> VMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then - ! DOMO -> SOMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - Jdomo = IBSET(Jdomo,q-1) - else - print*,"Something went wrong in obtain_associated_alphaI" - endif + pp = listholes(i) + iint = shiftr(pp-1,bit_kind_shift) + 1 + ipos = pp-shiftl((iint-1),bit_kind_shift)-1 + if(holetype(i) == 1)then + Jsomo(iint) = IBCLR(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBCLR(Jdomo(iint),ipos) + Jsomo(iint) = IBSET(Jsomo(iint),ipos) + endif - ! SOMO - !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) - if(POPCNT(Jsomo) .ge. NSOMOMin) then + do j = 1, nvmos + qq = listvmos(j) + if(pp.eq.qq) cycle + jint = shiftr(qq-1,bit_kind_shift) + 1 + jpos = qq-shiftl((jint-1),bit_kind_shift)-1 + if(vmotype(j) == 1)then + Jsomo(jint) = IBSET(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBSET(Jdomo(jint),jpos) + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + endif + + if(tableUniqueAlphas(pp,qq)) then + + Nsomo_J = 0 + countelec = 0 + do ii=1, N_int + Jcfg(ii,1) = Jsomo(ii) + Jcfg(ii,2) = Jdomo(ii) + Nsomo_J += POPCNT(Jsomo(ii)) + countelec += POPCNT(Jsomo(ii))*1 + POPCNT(Jdomo(ii))*2 + enddo + + if(Nsomo_J .ge. NSOMOMin) then + !print *," Idx = ",idxI, "p = ",pp, " q = ",qq," Jsomo=",Jsomo(1), " Jdomo=",IOR(Jdomo(1),ISHFT(1_8,n_core_orb)-1) NalphaIcfg += 1 - alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo - alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + !if(idxI.eq.8)then + ! print *," 1 Idx = ",idxI, " Nalpha=",NalphaIcfg, " n_core_orb=",n_core_orb + !endif + alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1) + alphasIcfg_list(:,2,idxI,NalphaIcfg) = Jcfg(:,2) + if(n_core_orb .le. 64)then + alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1) + else + n_core_orb_64 = n_core_orb + do ii=1,N_int + if(n_core_orb_64 .gt. 0)then + alphasIcfg_list(ii,2,idxI,NalphaIcfg) = IOR(Jcfg(ii,2),ISHFT(1_8,n_core_orb_64)-1) + else + alphasIcfg_list(ii,2,idxI,NalphaIcfg) = Jcfg(ii,2) + endif + n_core_orb_64 = ISHFT(n_core_orb_64,-6) + end do + endif NalphaIcfg_list(idxI) = NalphaIcfg endif + !print *," ", NalphaIcfg, Jsomo(1), Jsomo(2), "|", Jdomo(1), Jdomo(2) + endif + + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + Jsomo(jint) = IBSET(Jsomo(jint),jpos) endif end do + if(holetype(i) == 1)then + Jsomo(iint) = IBSET(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBSET(Jdomo(iint),ipos) + Jsomo(iint) = IBCLR(Jsomo(iint),ipos) + endif end do ! Check if this Icfg has been previously generated as a mono ppExistsQ = .False. - Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) - Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + do ii=1, N_int + Isomo(ii) = iand(act_bitmask(ii,1),psi_configuration(ii,1,idxI)) + Idomo(ii) = iand(act_bitmask(ii,2),psi_configuration(ii,2,idxI)) + enddo + !Icfg = psi_configuration(:,:,idxI) + + !kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kstart = 1 + ndiffDOMO = 0 do k = kstart, idxI-1 - diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) - ndiffSOMO = POPCNT(diffSOMO) + ndiffSOMO = 0 + do ii=1,N_int + diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k))) + ndiffSOMO += POPCNT(diffSOMO) + end do + ! ndiffSOMO cannot be 0 (I /= k) if idxI is a single ex + ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense + ! this Icfg could not have been generated before. if (ndiffSOMO /= 2) cycle - diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) - xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) - ndiffDOMO = POPCNT(diffDOMO) - nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + ndiffDOMO = 0 + nxordiffSOMODOMO = 0 + do ii=1,N_int + diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,2),psi_configuration(ii,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO += POPCNT(diffDOMO) + nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) + end do if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then ppExistsQ = .TRUE. EXIT @@ -277,15 +904,17 @@ use bitmasks ! Diagonal part (pp,qq) if(nholes > 0 .AND. (.NOT. ppExistsQ))then ! SOMO - if(POPCNT(Jsomo) .ge. NSOMOMin) then + if(Nsomo_I .ge. NSOMOMin) then NalphaIcfg += 1 - alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1) - alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2) + alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1) + alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2) NalphaIcfg_list(idxI) = NalphaIcfg endif + !print *," ---> ", NalphaIcfg, Icfg(1,1), Icfg(2,1), "|", Icfg(1,2), Icfg(2,2) endif NalphaIcfg = 0 + enddo ! end loop idxI call wall_time(t1) print *, 'Preparation : ', t1 - t0 @@ -303,6 +932,7 @@ END_PROVIDER integer,intent(in) :: idxI ! The id of the Ith CFG integer(bit_kind),intent(in) :: Icfg(N_int,2) + integer(bit_kind) :: Jcfg(N_int,2) integer,intent(out) :: NalphaIcfg integer(bit_kind),intent(out) :: alphasIcfg(N_int,2,*) logical,dimension(:,:),allocatable :: tableUniqueAlphas @@ -312,74 +942,88 @@ END_PROVIDER integer :: nvmos integer :: listvmos(mo_num) integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO - integer*8 :: Idomo - integer*8 :: Isomo - integer*8 :: Jdomo - integer*8 :: Jsomo - integer*8 :: diffSOMO - integer*8 :: diffDOMO - integer*8 :: xordiffSOMODOMO + integer(bit_kind) :: Idomo(N_int), Idomop(N_int), Idomoq(N_int) + integer(bit_kind) :: Isomo(N_int), Isomop(N_int), Isomoq(N_int) + integer(bit_kind) :: Jdomo(N_int), Jdomop(N_int), Jdomoq(N_int) + integer(bit_kind) :: Jsomo(N_int), Jsomop(N_int), Jsomoq(N_int) + !integer(bit_kind) :: diffDOMO(N_int), xordiffSOMODOMO(N_int), diffSOMO(N_int) + integer(bit_kind) :: diffDOMO, xordiffSOMODOMO, diffSOMO integer :: ndiffSOMO integer :: ndiffDOMO integer :: nxordiffSOMODOMO integer :: ndiffAll integer :: i, ii integer :: j, jj + integer :: iii, iint, jint, ipos, jpos + integer :: i_s, i_d integer :: k, kk integer :: kstart integer :: kend - integer :: Nsomo_I - integer :: hole - integer :: p - integer :: q + integer :: Nsomo_I, Nsomo_J + integer :: hole, n_core_orb_64 + integer :: p, pp, p_s + integer :: q, qq, q_s integer :: countalphas logical :: pqAlreadyGenQ logical :: pqExistsQ logical :: ppExistsQ - Isomo = iand(act_bitmask(1,1),Icfg(1,1)) - Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + integer :: listall(N_int*bit_kind_size), nelall + + do ii=1, N_int + Isomo(ii) = iand(act_bitmask(ii,1),Icfg(ii,1)) + Idomo(ii) = iand(act_bitmask(ii,2),Icfg(ii,2)) + enddo + !print*,"Input cfg" !call debug_spindet(Isomo,1) !call debug_spindet(Idomo,1) + alphasIcfg(:,:,1) = 0_bit_kind + ! find out all pq holes possible - nholes = 0 - ! holes in SOMO - do ii = 1,n_act_orb - i = list_act(ii) - if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = i - holetype(nholes) = 1 - endif - end do - ! holes in DOMO - do ii = 1,n_act_orb - i = list_act(ii) - if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = i - holetype(nholes) = 2 - endif - end do + nholes = 0 + call bitstring_to_list(Isomo,listall,nelall,N_int) + + do iii=1,nelall + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 1 + end do + + Nsomo_I = nelall + + call bitstring_to_list(Idomo,listall,nelall,N_int) + + do iii=1,nelall + if(listall(iii) .gt. n_core_orb)then + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 2 + endif + end do + ! find vmos - listvmos = -1 - vmotype = -1 - nvmos = 0 - do ii = 1,n_act_orb - i = list_act(ii) - !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) - if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then - nvmos += 1 - listvmos(nvmos) = i - vmotype(nvmos) = 1 - else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then - nvmos += 1 - listvmos(nvmos) = i - vmotype(nvmos) = 2 - end if - end do + ! Take into account N_int + nvmos=0 + do ii = 1, n_act_orb + iii = list_act(ii) + iint = shiftr(iii-1,bit_kind_shift) + 1 + ipos = iii-shiftl((iint-1),bit_kind_shift)-1 + + if(IAND(Idomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then + if(IAND(Isomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = iii + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo(iint),(IBSET(0_8,ipos)))) .EQ. 1) then + nvmos += 1 + listvmos(nvmos) = iii + vmotype(nvmos) = 2 + end if + end if + end do + !print *,"Nvmo=",nvmos !print *,listvmos @@ -388,10 +1032,15 @@ END_PROVIDER allocate(tableUniqueAlphas(mo_num,mo_num)) tableUniqueAlphas = .FALSE. + ! Now find the allowed (p,q) excitations + do ii=1, N_int + Isomo(ii) = iand(act_bitmask(ii,1),Icfg(ii,1)) + Idomo(ii) = iand(act_bitmask(ii,2),Icfg(ii,2)) + Jsomo(ii) = Isomo(ii) + Jdomo(ii) = Idomo(ii) + enddo + ! Now find the allowed (p,q) excitations - Isomo = iand(act_bitmask(1,1),Icfg(1,1)) - Idomo = iand(act_bitmask(1,1),Icfg(1,2)) - Nsomo_I = POPCNT(Isomo) if(Nsomo_I .EQ. 0) then kstart = 1 else @@ -411,41 +1060,40 @@ END_PROVIDER !enddo do i = 1,nholes - p = listholes(i) + pp = listholes(i) + iint = shiftr(pp-1,bit_kind_shift) + 1 + ipos = pp-shiftl((iint-1),bit_kind_shift)-1 + if(holetype(i) == 1)then + Jsomo(iint) = IBCLR(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBCLR(Jdomo(iint),ipos) + endif + do j = 1,nvmos - q = listvmos(j) - if(p .EQ. q) cycle - if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then - ! SOMO -> VMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = Idomo - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) - kend = idxI-1 - else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then - ! SOMO -> SOMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBSET(Idomo,q-1) - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) - kend = idxI-1 - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then - ! DOMO -> VMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - kstart = cfg_seniority_index(Nsomo_I) - kend = idxI-1 - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then - ! DOMO -> SOMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - Jdomo = IBSET(Jdomo,q-1) - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) - kend = idxI-1 - else - print*,"Something went wrong in obtain_associated_alphaI" + qq = listvmos(j) + jint = shiftr(qq-1,bit_kind_shift) + 1 + jpos = qq-shiftl((jint-1),bit_kind_shift)-1 + if(vmotype(j) == 1)then + Jsomo(jint) = IBSET(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBSET(Jdomo(jint),jpos) + endif + + do ii=1, N_int + Jcfg(ii,1) = Jsomo(ii) + Jcfg(ii,2) = Jdomo(ii) + enddo + + call bitstring_to_list(Jcfg,listall,nelall,N_int) + Nsomo_J = nelall + + if(pp .EQ. qq) then + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + endif + cycle endif ! Again, we don't have to search from 1 @@ -456,14 +1104,19 @@ END_PROVIDER pqAlreadyGenQ = .FALSE. ! First check if it can be generated before do k = kstart, kend - diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) - ndiffSOMO = POPCNT(diffSOMO) + ndiffSOMO = 0 + ndiffDOMO = 0 + nxordiffSOMODOMO = 0 + do ii = 1, N_int + diffSOMO = IEOR(Jcfg(ii,1),iand(reunion_of_act_virt_bitmask(ii,1),psi_configuration(ii,1,k))) + ndiffSOMO += POPCNT(diffSOMO) + diffDOMO = IEOR(Jcfg(ii,2),iand(reunion_of_act_virt_bitmask(ii,2),psi_configuration(ii,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO += POPCNT(diffDOMO) + nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO) + end do if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle - diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) - xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) - ndiffDOMO = POPCNT(diffDOMO) - nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) - nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then if((ndiffSOMO+ndiffDOMO) .EQ. 0) then pqAlreadyGenQ = .TRUE. @@ -473,19 +1126,20 @@ END_PROVIDER if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then pqAlreadyGenQ = .TRUE. !EXIT - !ppExistsQ = .TRUE. - !print *,i,k,ndiffSOMO,ndiffDOMO - !call debug_spindet(Jsomo,1) - !call debug_spindet(Jdomo,1) - !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1) - !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1) EXIT endif end do !print *,"(,",p,",",q,")",pqAlreadyGenQ - if(pqAlreadyGenQ) cycle + if(pqAlreadyGenQ) then + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + endif + cycle + endif pqExistsQ = .FALSE. ! now check if this exists in the selected list @@ -501,58 +1155,73 @@ END_PROVIDER !end do if(.NOT. pqExistsQ) then - tableUniqueAlphas(p,q) = .TRUE. + tableUniqueAlphas(pp,qq) = .TRUE. !print *,p,q !call debug_spindet(Jsomo,1) !call debug_spindet(Jdomo,1) endif + if(vmotype(j) == 1)then + Jsomo(jint) = IBCLR(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBCLR(Jdomo(jint),jpos) + endif end do + if(holetype(i) == 1)then + Jsomo(iint) = IBSET(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBSET(Jdomo(iint),ipos) + endif end do !print *,tableUniqueAlphas(:,:) ! prune list of alphas - Isomo = Icfg(1,1) - Idomo = Icfg(1,2) - Jsomo = Icfg(1,1) - Jdomo = Icfg(1,2) + do ii=1, N_int + Isomo(ii) = iand(act_bitmask(ii,1),Icfg(ii,1)) + Idomo(ii) = iand(act_bitmask(ii,2),Icfg(ii,2)) + Jsomo(ii) = Isomo(ii) + Jdomo(ii) = Idomo(ii) + enddo + NalphaIcfg = 0 do i = 1, nholes - p = listholes(i) - do j = 1, nvmos - q = listvmos(j) - if(p .EQ. q) cycle - if(tableUniqueAlphas(p,q)) then - if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then - ! SOMO -> VMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = Idomo - else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then - ! SOMO -> SOMO - Jsomo = IBCLR(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBSET(Idomo,q-1) - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then - ! DOMO -> VMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBSET(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then - ! DOMO -> SOMO - Jsomo = IBSET(Isomo,p-1) - Jsomo = IBCLR(Jsomo,q-1) - Jdomo = IBCLR(Idomo,p-1) - Jdomo = IBSET(Jdomo,q-1) - else - print*,"Something went wrong in obtain_associated_alphaI" - endif + pp = listholes(i) + iint = shiftr(pp-1,bit_kind_shift) + 1 + ipos = pp-shiftl((iint-1),bit_kind_shift)-1 + if(holetype(i) == 1)then + Jsomo(iint) = IBCLR(Jsomo(iint),ipos) + else if(holetype(i) == 2)then + Jdomo(iint) = IBCLR(Jdomo(iint),ipos) + endif + do j = 1, nvmos + qq = listvmos(j) + jint = shiftr(qq-1,bit_kind_shift) + 1 + jpos = qq-shiftl((jint-1),bit_kind_shift)-1 + if(vmotype(j) == 1)then + Jsomo(jint) = IBSET(Jsomo(jint),jpos) + else if(vmotype(j) == 2)then + Jdomo(jint) = IBSET(Jdomo(jint),jpos) + endif + if(pp .EQ. qq) cycle + if(tableUniqueAlphas(pp,qq)) then ! SOMO NalphaIcfg += 1 - !print *,i,j,"|",NalphaIcfg - alphasIcfg(1,1,NalphaIcfg) = Jsomo - alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + alphasIcfg_list(:,1,idxI,NalphaIcfg) = Jcfg(:,1) + alphasIcfg_list(:,2,idxI,NalphaIcfg) = Jcfg(:,2) + if(n_core_orb .le. 64)then + alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jcfg(1,2),ISHFT(1_8,n_core_orb)-1) + else + n_core_orb_64 = n_core_orb + do ii=1,N_int + if(n_core_orb_64 .gt. 0)then + alphasIcfg_list(ii,2,idxI,NalphaIcfg) = IOR(Jcfg(ii,2),ISHFT(1_8,n_core_orb_64)-1) + else + alphasIcfg_list(ii,2,idxI,NalphaIcfg) = Jcfg(ii,2) + endif + n_core_orb_64 = ISHFT(n_core_orb_64,-6) + end do + endif !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) endif end do @@ -560,15 +1229,25 @@ END_PROVIDER ! Check if this Icfg has been previously generated as a mono ppExistsQ = .False. - Isomo = iand(act_bitmask(1,1),Icfg(1,1)) - Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + !Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + !Idomo = iand(act_bitmask(1,2),Icfg(1,2)) do k = 1, idxI-1 - diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) - diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) - xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) - ndiffSOMO = POPCNT(diffSOMO) - ndiffDOMO = POPCNT(diffDOMO) - nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + do ii=1,N_int + diffSOMO = IEOR(Icfg(ii,1),iand(act_bitmask(ii,1),psi_configuration(ii,1,k))) + ndiffSOMO += POPCNT(diffSOMO) + end do + ! ndiffSOMO cannot be 0 (I /= k) + ! if ndiffSOMO /= 2 then it has to be greater than 2 and hense + ! this Icfg could not have been generated before. + if (ndiffSOMO /= 2) cycle + ndiffDOMO = 0 + nxordiffSOMODOMO = 0 + do ii=1,N_int + diffDOMO = IEOR(Icfg(ii,2),iand(act_bitmask(ii,2),psi_configuration(ii,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO += POPCNT(diffDOMO) + nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) + end do if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then ppExistsQ = .TRUE. EXIT @@ -581,8 +1260,8 @@ END_PROVIDER !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg !call debug_spindet(Idomo,1) !call debug_spindet(Jdomo,1) - alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1) - alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2) + alphasIcfg_list(:,1,idxI,NalphaIcfg) = Icfg(:,1) + alphasIcfg_list(:,2,idxI,NalphaIcfg) = Icfg(:,2) endif end subroutine diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f index 494c3bfa..92c8e669 100644 --- a/src/csf/conversion.irp.f +++ b/src/csf/conversion.irp.f @@ -114,6 +114,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) integer :: idx integer MS MS = elec_alpha_num-elec_beta_num + !print *,"size=",size(tmp_psi_coef_det,1)," ",size(tmp_psi_coef_det,2) countcsf = 0 diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index 7d7ae09b..211d5af6 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -38,6 +38,7 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, integer :: holetype(mo_num) integer :: end_index integer :: Nsomo_I + integer :: listall(N_int*bit_kind_size), nelall ! ! 2 2 1 1 0 0 : 1 1 0 0 0 0 @@ -65,9 +66,12 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, ! Since CFGs are sorted wrt to seniority ! we don't have to search the full CFG list - Isomo = givenI(1,1) - Idomo = givenI(1,2) - Nsomo_I = POPCNT(Isomo) + Nsomo_I = 0 + do i=1,N_int + Isomo = givenI(i,1) + Idomo = givenI(i,2) + Nsomo_I += POPCNT(Isomo) + end do end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1) if(end_index .LT. 0) end_index= N_configuration !end_index = N_configuration @@ -83,17 +87,24 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, ! idxs_connectedI(nconnectedI)=i ! cycle !endif - Isomo = givenI(1,1) - Idomo = givenI(1,2) - Jsomo = psi_configuration(1,1,i) - Jdomo = psi_configuration(1,2,i) - diffSOMO = IEOR(Isomo,Jsomo) - ndiffSOMO = POPCNT(diffSOMO) - diffDOMO = IEOR(Idomo,Jdomo) - xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) - ndiffDOMO = POPCNT(diffDOMO) - nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) - nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + + ndiffSOMO = 0 + ndiffDOMO = 0 + nxordiffSOMODOMO = 0 + do ii=1,N_int + Isomo = givenI(ii,1) + Idomo = givenI(ii,2) + Jsomo = psi_configuration(ii,1,i) + Jdomo = psi_configuration(ii,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO += POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO += POPCNT(diffDOMO) + nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO) + end do + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then !------- ! MONO | @@ -144,25 +155,45 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, ! find out all pq holes possible nholes = 0 ! holes in SOMO - Isomo = psi_configuration(1,1,i) - Idomo = psi_configuration(1,2,i) - do iii = 1,n_act_orb - ii = list_act(iii) - if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = ii - holetype(nholes) = 1 - endif + !Isomo = psi_configuration(1,1,i) + !Idomo = psi_configuration(1,2,i) + !do iii = 1,n_act_orb + ! ii = list_act(iii) + ! if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = ii + ! holetype(nholes) = 1 + ! endif + !end do + + call bitstring_to_list(psi_configuration(1,1,i),listall,nelall,N_int) + + do iii=1,nelall + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 1 end do + ! holes in DOMO - do iii = 1,n_act_orb - ii = list_act(iii) - if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = ii - holetype(nholes) = 2 - endif + !do iii = 1,n_act_orb + ! ii = list_act(iii) + ! if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = ii + ! holetype(nholes) = 2 + ! endif + !end do + + call bitstring_to_list(psi_configuration(1,2,i),listall,nelall,N_int) + + do iii=1,nelall + if(listall(iii) .gt. n_core_orb)then + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 2 + endif end do + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes) endif end do @@ -199,6 +230,8 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI integer*8 :: Isomo integer*8 :: Jdomo integer*8 :: Jsomo + integer(bit_kind) :: Jcfg(N_int,2) + integer(bit_kind) :: Icfg(N_int,2) integer*8 :: IJsomo integer*8 :: diffSOMO integer*8 :: diffDOMO @@ -209,132 +242,261 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes integer :: listholes(mo_num) integer :: holetype(mo_num) - integer :: end_index - integer :: Nsomo_alpha + integer :: end_index, ishift + integer :: Nsomo_alpha, pp,qq, nperm, iint, ipos integer*8 :: MS + integer :: exc(0:2,2,2), tz, m, n, high, low + integer :: listall(N_int*bit_kind_size), nelall + integer :: nconnectedExtradiag, nconnectedDiag + integer(bit_kind) :: hole, particle, tmp MS = elec_alpha_num-elec_beta_num + nconnectedExtradiag=0 + nconnectedDiag=0 nconnectedI = 0 end_index = N_configuration ! Since CFGs are sorted wrt to seniority ! we don't have to search the full CFG list - Isomo = Ialpha(1,1) - Idomo = Ialpha(1,2) - Nsomo_alpha = POPCNT(Isomo) + !Isomo = Ialpha(1,1) + !Idomo = Ialpha(1,2) + !Nsomo_alpha = POPCNT(Isomo) + Icfg = Ialpha + Nsomo_alpha = 0 + !print *," Ialpha=" + do ii=1,N_int + Isomo = Ialpha(ii,1) + Idomo = Ialpha(ii,2) + Nsomo_alpha += POPCNT(Isomo) + !print *,Isomo, Idomo, "Nsomo=",Nsomo_alpha + end do end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) - if(end_index .LT. 0) end_index= N_configuration + if(end_index .LT. 0 .OR. end_index .lt. idxI) end_index= N_configuration end_index = N_configuration p = 0 q = 0 - if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1' + !if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1' do i=idxI,end_index - Isomo = Ialpha(1,1) - Idomo = Ialpha(1,2) - Jsomo = psi_configuration(1,1,i) - Jdomo = psi_configuration(1,2,i) ! Check for Minimal alpha electrons (MS) - if(POPCNT(Isomo).lt.MS)then + if(Nsomo_alpha .lt. MS)then cycle endif - diffSOMO = IEOR(Isomo,Jsomo) - ndiffSOMO = POPCNT(diffSOMO) - !if(idxI.eq.1)then - ! print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo) + + ndiffSOMO = 0 + ndiffDOMO = 0 + nxordiffSOMODOMO = 0 + nsomoJ=0 + nsomoalpha=0 + do ii=1,N_int + Isomo = Ialpha(ii,1) + Idomo = Ialpha(ii,2) + Jsomo = psi_configuration(ii,1,i) + Jdomo = psi_configuration(ii,2,i) + nsomoJ += POPCNT(Jsomo) + nsomoalpha += POPCNT(Isomo) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO += POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO += POPCNT(diffDOMO) + nxordiffSOMODOMO += POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += POPCNT(diffSOMO) + POPCNT(diffDOMO) + end do + !if(idxI.eq.218)then + ! print *,"I=",idxI,"Nsomo_alpha=",Nsomo_alpha,"nxordiffSOMODOMO(4)=",nxordiffSOMODOMO, " ndiffSOMO(2)=",ndiffSOMO, " ndiffDOMO=",ndiffDOMO !endif - diffDOMO = IEOR(Idomo,Jdomo) - xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) - ndiffDOMO = POPCNT(diffDOMO) - nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) - nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !Jcfg = psi_configuration(:,:,i) + !print *,"nxordiffSOMODOMO(4)=",nxordiffSOMODOMO, " ndiffSOMO(2)=",ndiffSOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then select case(ndiffDOMO) case (0) ! SOMO -> VMO !print *,"obt SOMO -> VMO" extyp = 3 - IJsomo = IEOR(Isomo, Jsomo) -!IRP_IF WITHOUT_TRAILZ -! p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1 -!IRP_ELSE - p = TRAILZ(IAND(Isomo,IJsomo)) + 1 -!IRP_ENDIF - IJsomo = IBCLR(IJsomo,p-1) -!IRP_IF WITHOUT_TRAILZ -! q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 -!IRP_ELSE - q = TRAILZ(IJsomo) + 1 -!IRP_ENDIF + !if(N_int .eq. 1) then + ! IJsomo = IEOR(Isomo, Jsomo) + ! p = TRAILZ(IAND(Isomo,IJsomo)) + 1 + ! IJsomo = IBCLR(IJsomo,p-1) + ! q = TRAILZ(IJsomo) + 1 + ! !print *," p=",p," q=",q + ! !call get_single_excitation_cfg(Jcfg, Icfg, p, q, N_int) + !else + ! Find p + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + IJsomo = IEOR(Isomo, Jsomo) + if(popcnt(IAND(Isomo,IJsomo)) > 0)then + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 + (ii-1) * bit_kind_size + EXIT + endif + end do + ! Find q + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + IJsomo = IEOR(Isomo, Jsomo) + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift) + if(iint .eq. ii)then + IJsomo = IBCLR(IJsomo,ipos-1) + endif + if(popcnt(IJsomo) > 0)then + q = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size + EXIT + endif + enddo + !endif + !assert ( p == pp) + !assert ( q == qq) + !print *," 1--- p=",p," q=",q case (1) ! DOMO -> VMO ! or ! SOMO -> SOMO - nsomoJ = POPCNT(Jsomo) - nsomoalpha = POPCNT(Isomo) if(nsomoJ .GT. nsomoalpha) then ! DOMO -> VMO !print *,"obt DOMO -> VMO" extyp = 2 -!IRP_IF WITHOUT_TRAILZ -! p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1 -!IRP_ELSE - p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 -!IRP_ENDIF - Isomo = IEOR(Isomo, Jsomo) - Isomo = IBCLR(Isomo,p-1) -!IRP_IF WITHOUT_TRAILZ -! q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 -!IRP_ELSE - q = TRAILZ(Isomo) + 1 -!IRP_ENDIF + !if(N_int.eq.1)then + ! p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + ! Isomo = IEOR(Isomo, Jsomo) + ! Isomo = IBCLR(Isomo,p-1) + ! q = TRAILZ(Isomo) + 1 + !else + + ! Find p + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + Idomo = Ialpha(ii,2) + Jdomo = psi_configuration(ii,2,i) + if(popcnt(IEOR(Idomo,Jdomo)) > 0)then + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + (ii-1) * bit_kind_size + EXIT + endif + end do + ! Find q + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + IJsomo = IEOR(Isomo, Jsomo) + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift) + if(iint .eq. ii)then + IJsomo = IBCLR(IJsomo,ipos-1) + endif + if(popcnt(IJsomo) > 0)then + q = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size + EXIT + endif + end do + !endif + !assert ( p == pp) + !assert ( q == qq) else ! SOMO -> SOMO !print *,"obt SOMO -> SOMO" extyp = 1 -!IRP_IF WITHOUT_TRAILZ -! q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 -!IRP_ELSE - q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 -!IRP_ENDIF - Isomo = IEOR(Isomo, Jsomo) - Isomo = IBCLR(Isomo,q-1) -!IRP_IF WITHOUT_TRAILZ -! p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 -!IRP_ELSE - p = TRAILZ(Isomo) + 1 -!IRP_ENDIF - ! Check for Minimal alpha electrons (MS) - !if(POPCNT(Isomo).lt.MS)then - ! cycle + !if(N_int.eq.1)then + ! q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + ! Isomo = IEOR(Isomo, Jsomo) + ! Isomo = IBCLR(Isomo,q-1) + ! p = TRAILZ(Isomo) + 1 + ! ! Check for Minimal alpha electrons (MS) + ! !if(POPCNT(Isomo).lt.MS)then + ! ! cycle + ! !endif + !else + ! Find p + !print *,"Ialpha somo=",Ialpha(1,1), Ialpha(2,1)," Ialpha domo=",Ialpha(1,2), Ialpha(2,2) + !print *,"J somo=",psi_configuration(1,1,i), psi_configuration(2,1,i)," J domo=",psi_configuration(1,2,i),& + !psi_configuration(2,2,i) + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + Idomo = Ialpha(ii,2) + Jdomo = psi_configuration(ii,2,i) + if(popcnt(IEOR(Idomo,Jdomo)) > 0)then + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 + (ii-1) * bit_kind_size + EXIT + endif + enddo + ! Find q + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + IJsomo = IEOR(Isomo, Jsomo) + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift) + if(iint .eq. ii)then + IJsomo = IBCLR(IJsomo,ipos-1) + endif + !print *,"ii=",ii," Isomo=",Isomo + if(popcnt(IJsomo) > 0)then + p = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size + EXIT + endif + enddo !endif - end if + !assert ( p == pp) + !assert ( q == qq) + endif + !print *," 2--- p=",p," q=",q case (2) ! DOMO -> SOMO !print *,"obt DOMO -> SOMO" extyp = 4 - IJsomo = IEOR(Isomo, Jsomo) -!IRP_IF WITHOUT_TRAILZ -! p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1 -!IRP_ELSE - p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 -!IRP_ENDIF - IJsomo = IBCLR(IJsomo,p-1) -!IRP_IF WITHOUT_TRAILZ -! q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1 -!IRP_ELSE - q = TRAILZ(IJsomo) + 1 -!IRP_ENDIF + !if(N_int.eq.1)then + ! IJsomo = IEOR(Isomo, Jsomo) + ! p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 + ! IJsomo = IBCLR(IJsomo,p-1) + ! q = TRAILZ(IJsomo) + 1 + !else + ! Find p + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + Idomo = Ialpha(ii,2) + Jdomo = psi_configuration(ii,2,i) + IJsomo = IEOR(Isomo, Jsomo) + if(popcnt(IAND(Jsomo,IJsomo)) > 0)then + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 + (ii-1) * bit_kind_size + EXIT + endif + enddo + ! Find q + do ii=1,N_int + Isomo = Ialpha(ii,1) + Jsomo = psi_configuration(ii,1,i) + IJsomo = IEOR(Isomo, Jsomo) + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift) + if(iint .eq. ii)then + IJsomo = IBCLR(IJsomo,ipos-1) + endif + if(popcnt(IJsomo) > 0)then + q = TRAILZ(IJsomo) + 1 + (ii-1) * bit_kind_size + EXIT + endif + enddo + !endif + !assert ( p == pp) + !assert ( q == qq) + !print *," 3--- p=",p," q=",q case default print *,"something went wront in get connectedI" end select starti = psi_config_data(i,1) endi = psi_config_data(i,2) + nconnectedExtradiag+=1 nconnectedI += 1 - do k=1,N_int - connectedI(k,1,nconnectedI) = psi_configuration(k,1,i) - connectedI(k,2,nconnectedI) = psi_configuration(k,2,i) + do ii=1,N_int + connectedI(ii,1,nconnectedI) = psi_configuration(ii,1,i) + connectedI(ii,2,nconnectedI) = psi_configuration(ii,2,i) enddo idxs_connectedI(nconnectedI)=starti excitationIds(1,nconnectedI)=p @@ -343,28 +505,51 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI diagfactors(nconnectedI) = 1.0d0 else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then ! find out all pq holes possible + !print *,"I = ",i + !print *,"I somo= ",psi_configuration(1,1,i), " domo=", psi_configuration(1,2,i) + !print *,"alp somo= ",Ialpha(1,1), " domo=", Ialpha(1,2) nholes = 0 ! holes in SOMO - Isomo = psi_configuration(1,1,i) - Idomo = psi_configuration(1,2,i) - do iii = 1,n_act_orb - ii = list_act(iii) - if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = ii - holetype(nholes) = 1 - endif + !Isomo = psi_configuration(1,1,i) + !Idomo = psi_configuration(1,2,i) + !do iii = 1,n_act_orb + ! ii = list_act(iii) + ! if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = ii + ! holetype(nholes) = 1 + ! endif + !end do + call bitstring_to_list(psi_configuration(1,1,i),listall,nelall,N_int) + + do iii=1,nelall + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 1 end do + ! holes in DOMO - do iii = 1,n_act_orb - ii = list_act(iii) - if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = ii - holetype(nholes) = 2 - endif + !do iii = 1,n_act_orb + ! ii = list_act(iii) + ! if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = ii + ! holetype(nholes) = 2 + ! endif + !end do + nelall=0 + listall=0 + call bitstring_to_list(psi_configuration(1,2,i),listall,nelall,N_int) + + do iii=1,nelall + if(listall(iii) .gt. n_core_orb)then + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 2 + endif end do + do k=1,nholes p = listholes(k) q = p @@ -372,6 +557,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI if(holetype(k) .EQ. 1) then starti = psi_config_data(i,1) endi = psi_config_data(i,2) + nconnectedDiag+=1 nconnectedI += 1 connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) idxs_connectedI(nconnectedI)=starti @@ -382,6 +568,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI else starti = psi_config_data(i,1) endi = psi_config_data(i,2) + nconnectedDiag+=1 nconnectedI += 1 connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) idxs_connectedI(nconnectedI)=starti @@ -390,8 +577,10 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI excitationTypes(nconnectedI) = extyp diagfactors(nconnectedI) = 2.0d0 endif + !print *,excitationIds(1,nconnectedI), excitationIds(2,nconnectedI) enddo endif end do + !print *,"nconnectedExtradiag=",nconnectedExtradiag," nconnectedDiad=",nconnectedDiag end subroutine obtain_connected_I_foralpha diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 833fa7b0..c6644c4c 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -146,7 +146,6 @@ ncfgprev = cfg_seniority_index(i+2) end do !print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration - END_PROVIDER @@ -832,7 +831,7 @@ subroutine calculate_preconditioner_cfg(diag_energies) ! the configurations in psi_configuration ! returns : diag_energies : END_DOC - integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj + integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj, iii real*8,intent(out) :: diag_energies(n_CSF) integer :: nholes integer :: nvmos @@ -858,8 +857,8 @@ subroutine calculate_preconditioner_cfg(diag_energies) real*8, external :: mo_two_e_integral real*8 :: hpp real*8 :: meCC - real*8 :: ecore real*8 :: core_act_contrib + integer :: listall(N_int*bit_kind_size), nelall !PROVIDE h_core_ri PROVIDE core_fock_operator @@ -869,7 +868,6 @@ subroutine calculate_preconditioner_cfg(diag_energies) !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num ! calculate core energy - !call get_core_energy(ecore) diag_energies = core_energy - nuclear_repulsion ! calculate the core energy @@ -877,11 +875,11 @@ subroutine calculate_preconditioner_cfg(diag_energies) do i=1,N_configuration - Isomo = psi_configuration(1,1,i) - Idomo = psi_configuration(1,2,i) - Icfg(1,1) = psi_configuration(1,1,i) - Icfg(1,2) = psi_configuration(1,2,i) - NSOMOI = getNSOMO(psi_configuration(:,:,i)) + !Isomo = psi_configuration(1,1,i) + !Idomo = psi_configuration(1,2,i) + !Icfg(1,1) = psi_configuration(1,1,i) + !Icfg(1,2) = psi_configuration(1,2,i) + !NSOMOI = getNSOMO(psi_configuration(:,:,i)) starti = psi_config_data(i,1) endi = psi_config_data(i,2) @@ -890,48 +888,63 @@ subroutine calculate_preconditioner_cfg(diag_energies) ! find out all pq holes possible nholes = 0 + listholes = -1 ! holes in SOMO - !do k = 1,mo_num - do kk = 1,n_act_orb - k = list_act(kk) - if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 1 - endif - enddo - ! holes in DOMO - !do k = n_core_orb+1,n_core_orb + n_act_orb - !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb - !do k = 1,mo_num - do kk = 1,n_act_orb - k = list_act(kk) - if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 2 - endif - enddo + !do kk = 1,n_act_orb + ! k = list_act(kk) + ! if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = k + ! holetype(nholes) = 1 + ! endif + !enddo + call bitstring_to_list(psi_configuration(1,1,i),listall,nelall,N_int) - ! find vmos - listvmos = -1 - vmotype = -1 - nvmos = 0 - !do k = n_core_orb+1,n_core_orb + n_act_orb - !do k = 1,mo_num - do kk = 1,n_act_orb - k = list_act(kk) - !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) - if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 0 - else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 1 - end if - enddo + do iii=1,nelall + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 1 + end do + + ! holes in DOMO + !do kk = 1,n_act_orb + ! k = list_act(kk) + ! if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = k + ! holetype(nholes) = 2 + ! endif + !enddo + call bitstring_to_list(psi_configuration(1,2,i),listall,nelall,N_int) + + do iii=1,nelall + if(listall(iii) .gt. n_core_orb)then + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 2 + endif + end do + + + !!! find vmos + !!listvmos = -1 + !!vmotype = -1 + !!nvmos = 0 + !!!do k = n_core_orb+1,n_core_orb + n_act_orb + !!!do k = 1,mo_num + !!do kk = 1,n_act_orb + !! k = list_act(kk) + !! !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + !! if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + !! nvmos += 1 + !! listvmos(nvmos) = k + !! vmotype(nvmos) = 0 + !! else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + !! nvmos += 1 + !! listvmos(nvmos) = k + !! vmotype(nvmos) = 1 + !! end if + !!enddo !print *,"I=",i !call debug_spindet(psi_configuration(1,1,i),N_int) !call debug_spindet(psi_configuration(1,2,i),N_int) @@ -1221,27 +1234,30 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod integer,intent(in) :: p,q integer,intent(in) :: extype integer,intent(out) :: pmodel,qmodel - !integer(bit_kind) :: Isomo(N_int) - !integer(bit_kind) :: Idomo(N_int) - !integer(bit_kind) :: Jsomo(N_int) - !integer(bit_kind) :: Jdomo(N_int) - integer*8 :: Isomo - integer*8 :: Idomo - integer*8 :: Jsomo - integer*8 :: Jdomo + integer(bit_kind) :: Isomo(N_int) + integer(bit_kind) :: Idomo(N_int) + integer(bit_kind) :: Jsomo(N_int) + integer(bit_kind) :: Jdomo(N_int) + !integer*8 :: Isomo + !integer*8 :: Idomo + !integer*8 :: Jsomo + !integer*8 :: Jdomo integer*8 :: mask - integer :: iint, ipos + integer :: iint, ipos, ii !integer(bit_kind) :: Isomotmp(N_int) !integer(bit_kind) :: Jsomotmp(N_int) integer*8 :: Isomotmp integer*8 :: Jsomotmp integer :: pos0,pos0prev + integer :: tmpp, tmpq ! TODO Flag (print) when model space indices is > 64 - Isomo = Ialpha(1,1) - Idomo = Ialpha(1,2) - Jsomo = Jcfg(1,1) - Jdomo = Jcfg(1,2) + do ii=1,N_int + Isomo(ii) = Ialpha(ii,1) + Idomo(ii) = Ialpha(ii,2) + Jsomo(ii) = Jcfg(ii,1) + Jdomo(ii) = Jcfg(ii,2) + end do pos0prev = 0 pmodel = p qmodel = q @@ -1255,40 +1271,155 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod ! SOMO -> SOMO ! remove all domos !print *,"type -> SOMO -> SOMO" - mask = ISHFT(1_8,p) - 1 - Isomotmp = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Isomotmp = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + !mask = ISHFT(1_8,p) - 1 + !Isomotmp = IAND(Isomo,mask) + !pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + !mask = ISHFT(1_8,q) - 1 + !Isomotmp = IAND(Isomo,mask) + !qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift)-1 + tmpp = 0 + !print *,"iint=",iint, " p=",p + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Isomotmp = IAND(Isomo(ii),mask) + !tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + tmpp += POPCNT(Isomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Isomotmp = IAND(Isomo(iint),mask) + !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + pmodel = tmpp + POPCNT(Isomotmp) + !print *,"iint=",iint, " ipos=",ipos,"pmodel=",pmodel, XOR(Isomotmp,mask),Isomo(iint) + + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift)-1 + tmpq = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Isomotmp = IAND(Isomo(ii),mask) + !tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + tmpq += POPCNT(Isomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Isomotmp = IAND(Isomo(iint),mask) + !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + qmodel = tmpq + POPCNT(Isomotmp) + !print *,"iint=",iint, " ipos=",ipos,"qmodel=",qmodel case (2) ! DOMO -> VMO ! remove all domos except one at p !print *,"type -> DOMO -> VMO" - mask = ISHFT(1_8,p) - 1 - Jsomotmp = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomotmp = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + !mask = ISHFT(1_8,p) - 1 + !Jsomotmp = IAND(Jsomo,mask) + !pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + !mask = ISHFT(1_8,q) - 1 + !Jsomotmp = IAND(Jsomo,mask) + !qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift)-1 + tmpp = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Jsomotmp = IAND(Jsomo(ii),mask) + !tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + tmpp += POPCNT(Jsomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Jsomotmp = IAND(Jsomo(iint),mask) + !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + pmodel = tmpp + POPCNT(Jsomotmp) + + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift)-1 + tmpq = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Jsomotmp = IAND(Jsomo(ii),mask) + !tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + tmpq += POPCNT(Jsomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Jsomotmp = IAND(Jsomo(iint),mask) + !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + qmodel = tmpq + POPCNT(Jsomotmp) case (3) ! SOMO -> VMO !print *,"type -> SOMO -> VMO" !Isomo = IEOR(Isomo,Jsomo) if(p.LT.q) then - mask = ISHFT(1_8,p) - 1 - Isomo = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomo = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + !mask = ISHFT(1_8,p) - 1 + !Isomo = IAND(Isomo,mask) + !pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + !mask = ISHFT(1_8,q) - 1 + !Jsomo = IAND(Jsomo,mask) + !qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift)-1 + tmpp = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Isomotmp = IAND(Isomo(ii),mask) + !tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + tmpp += POPCNT(Isomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Isomotmp = IAND(Isomo(iint),mask) + !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + pmodel = tmpp + POPCNT(Isomotmp) + + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift)-1 + tmpq = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Jsomotmp = IAND(Jsomo(ii),mask) + !tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + tmpq += POPCNT(Jsomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Jsomotmp = IAND(Jsomo(iint),mask) + !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1 + qmodel = tmpq + POPCNT(Jsomotmp) + 1 else - mask = ISHFT(1_8,p) - 1 - Isomo = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 - mask = ISHFT(1_8,q) - 1 - Jsomo = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + !mask = ISHFT(1_8,p) - 1 + !Isomo = IAND(Isomo,mask) + !pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + !mask = ISHFT(1_8,q) - 1 + !Jsomo = IAND(Jsomo,mask) + !qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift)-1 + tmpp = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Isomotmp = IAND(Isomo(ii),mask) + !tmpp += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + tmpp += POPCNT(Isomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Isomotmp = IAND(Isomo(iint),mask) + !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1 + pmodel = tmpp + POPCNT(Isomotmp) + 1 + + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift)-1 + tmpq = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Jsomotmp = IAND(Jsomo(ii),mask) + !tmpq += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + tmpq += POPCNT(Jsomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Jsomotmp = IAND(Jsomo(iint),mask) + !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + qmodel = tmpq + POPCNT(Jsomotmp) endif case (4) ! DOMO -> SOMO @@ -1296,19 +1427,75 @@ subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmod !print *,"type -> DOMO -> SOMO" !Isomo = IEOR(Isomo,Jsomo) if(p.LT.q) then - mask = ISHFT(1_8,p) - 1 - Jsomo = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - mask = ISHFT(1_8,q) - 1 - Isomo = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + !mask = ISHFT(1_8,p) - 1 + !Jsomo = IAND(Jsomo,mask) + !pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + !mask = ISHFT(1_8,q) - 1 + !Isomo = IAND(Isomo,mask) + !qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift)-1 + tmpp = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Jsomotmp = IAND(Jsomo(ii),mask) + !tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + tmpp += POPCNT(Jsomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Jsomotmp = IAND(Jsomo(iint),mask) + !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + pmodel = tmpp + POPCNT(Jsomotmp) + + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift)-1 + tmpq = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Isomotmp = IAND(Isomo(ii),mask) + !tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + tmpq += POPCNT(Isomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Isomotmp = IAND(Isomo(iint),mask) + !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + 1 + qmodel = tmpq + POPCNT(Isomotmp) + 1 else - mask = ISHFT(1_8,p) - 1 - Jsomo = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 - mask = ISHFT(1_8,q) - 1 - Isomo = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + !mask = ISHFT(1_8,p) - 1 + !Jsomo = IAND(Jsomo,mask) + !pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + !mask = ISHFT(1_8,q) - 1 + !Isomo = IAND(Isomo,mask) + !qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + + iint = shiftr(p-1,bit_kind_shift) + 1 + ipos = p-shiftl((iint-1),bit_kind_shift)-1 + tmpp = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Jsomotmp = IAND(Jsomo(ii),mask) + !tmpp += POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + tmpp += POPCNT(Jsomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Jsomotmp = IAND(Jsomo(iint),mask) + !pmodel = tmpp + POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + 1 + pmodel = tmpp + POPCNT(Jsomotmp) + 1 + + iint = shiftr(q-1,bit_kind_shift) + 1 + ipos = q-shiftl((iint-1),bit_kind_shift)-1 + tmpq = 0 + do ii=1,iint-1 + !mask = ISHFT(1_bit_kind,-1)-1_bit_kind + !Isomotmp = IAND(Isomo(ii),mask) + !tmpq += POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + tmpq += POPCNT(Isomo(ii)) + end do + mask = ISHFT(1_bit_kind,ipos+1) - 1 + Isomotmp = IAND(Isomo(iint),mask) + !qmodel = tmpq + POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + qmodel = tmpq + POPCNT(Isomotmp) endif case default print *,"something is wrong in convertOrbIdsToModelSpaceIds" @@ -1366,8 +1553,13 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze integer :: rowsTKI integer :: noccpp integer :: istart_cfg, iend_cfg, num_threads_max + integer :: iint, jint, ipos, jpos, Nsomo_I, iii integer :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ - integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer*8 :: MS,Ialpha, Ibeta + integer(bit_kind) :: Isomo(N_INT) + integer(bit_kind) :: Idomo(N_INT) + integer(bit_kind) :: Jsomo(N_INT) + integer(bit_kind) :: Jdomo(N_INT) integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk real*8 :: norm_coef_cfg, fac2eints real*8 :: norm_coef_det @@ -1382,6 +1574,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze real*8,dimension(:),allocatable:: diag_energies real*8 :: tmpvar, tmptot real*8 :: core_act_contrib + integer :: listall(N_int*bit_kind_size), nelall + integer :: countelec integer(omp_lock_kind), allocatable :: lock(:) call omp_set_max_active_levels(1) @@ -1410,8 +1604,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !nconnectedtotalmax = 1000 !nconnectedmaxJ = 1000 maxnalphas = elec_num*mo_num - Icfg(1,1) = psi_configuration(1,1,1) - Icfg(1,2) = psi_configuration(1,2,1) + Icfg(:,1) = psi_configuration(:,1,1) + Icfg(:,2) = psi_configuration(:,2,1) allocate(listconnectedJ(N_INT,2,max(sze,10000))) allocate(idslistconnectedJ(max(sze,10000))) call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax) @@ -1443,6 +1637,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,& !$OMP pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & + !$OMP qq, iint, jint, ipos, jpos, nelall, listall, Nsomo_I, countelec,& !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& !$OMP n_core_orb, n_act_orb, list_act, n, list_core, list_core_is_built,core_act_contrib, num_threads_max,& !$OMP n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built) @@ -1465,11 +1660,13 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! else ! cycle - Icfg(1,1) = psi_configuration(1,1,i) - Icfg(1,2) = psi_configuration(1,2,i) - Isomo = Icfg(1,1) - Idomo = Icfg(1,2) - NSOMOI = getNSOMO(Icfg) + do ii=1,N_INT + Icfg(ii,1) = psi_configuration(ii,1,i) + Icfg(ii,2) = psi_configuration(ii,2,i) + Isomo(ii) = Icfg(ii,1) + Idomo(ii) = Icfg(ii,2) + enddo + NSOMOI = getNSOMO(Icfg) ! find out all pq holes possible nholes = 0 @@ -1479,42 +1676,86 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! list_core_inact ! bitmasks !do k = 1,mo_num - do kk = 1,n_act_orb - k = list_act(kk) - if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 1 - endif - enddo - ! holes in DOMO - !do k = 1,mo_num - do kk = 1,n_act_orb - k = list_act(kk) - if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then - nholes += 1 - listholes(nholes) = k - holetype(nholes) = 2 - endif - enddo + ! do kk = 1,n_act_orb + ! k = list_act(kk) + ! if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = k + ! holetype(nholes) = 1 + ! endif + ! enddo + ! ! holes in DOMO + ! !do k = 1,mo_num + ! do kk = 1,n_act_orb + ! k = list_act(kk) + ! if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + ! nholes += 1 + ! listholes(nholes) = k + ! holetype(nholes) = 2 + ! endif + ! enddo + + ! ! find vmos + ! do kk = 1,n_act_orb + ! k = list_act(kk) + ! !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + ! if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + ! nvmos += 1 + ! listvmos(nvmos) = k + ! vmotype(nvmos) = 0 + ! else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + ! nvmos += 1 + ! listvmos(nvmos) = k + ! vmotype(nvmos) = 1 + ! end if + ! enddo + + ! find out all pq holes possible + nholes = 0 + call bitstring_to_list(Isomo,listall,nelall,N_int) + + do iii=1,nelall + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 1 + end do + + Nsomo_I = nelall + + call bitstring_to_list(Idomo,listall,nelall,N_int) + + do iii=1,nelall + if(listall(iii) .gt. n_core_orb)then + nholes += 1 + listholes(nholes) = listall(iii) + holetype(nholes) = 2 + endif + end do + - ! find vmos listvmos = -1 vmotype = -1 nvmos = 0 - do kk = 1,n_act_orb - k = list_act(kk) - !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) - if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 0 - else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then - nvmos += 1 - listvmos(nvmos) = k - vmotype(nvmos) = 1 - end if - enddo + ! find vmos + ! Take into account N_int + do ii = 1, n_act_orb + iii = list_act(ii) + iint = shiftr(iii-1,bit_kind_shift) + 1 + ipos = iii-shiftl((iint-1),bit_kind_shift)-1 + + if(IAND(Idomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then + if(IAND(Isomo(iint),(IBSET(0_8,ipos))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = iii + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo(iint),(IBSET(0_8,ipos)))) .EQ. 1) then + nvmos += 1 + listvmos(nvmos) = iii + vmotype(nvmos) = 2 + end if + end if + end do + ! Icsf ids @@ -1533,16 +1774,31 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze extype = excitationTypes_single(j) ! Off diagonal terms call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) - Jsomo = singlesI(1,1,j) - Jdomo = singlesI(1,2,j) + do ii=1,N_INT + Jsomo(ii) = singlesI(1,1,j) + Jdomo(ii) = singlesI(1,2,j) + enddo + + ! Get actual p pos + pp = p + iint = shiftr(pp-1,bit_kind_shift) + 1 + ipos = pp-shiftl((iint-1),bit_kind_shift)-1 + + ! Get actual q pos + qq = q + jint = shiftr(qq-1,bit_kind_shift) + 1 + jpos = qq-shiftl((jint-1),bit_kind_shift)-1 ! Add the hole on J - if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + !if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + if(POPCNT(IAND(Jsomo(jint),IBSET(0_8,jpos))) .EQ. 1 .AND. POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then nholes += 1 listholes(nholes) = q holetype(nholes) = 1 endif - if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + !if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + if((POPCNT(IAND(Jdomo(jint),IBSET(0_8,jpos))) .EQ. 1 .AND. POPCNT(IAND(Idomo(jint),IBSET(0_8,jpos))) .EQ. 0) .AND.& + POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then nholes += 1 listholes(nholes) = q holetype(nholes) = 2 @@ -1578,10 +1834,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze enddo ! Undo setting in listholes - if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + !if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + if(POPCNT(IAND(Jsomo(jint),IBSET(0_8,jpos))) .EQ. 1 .AND. POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then nholes -= 1 endif - if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + if((POPCNT(IAND(Jdomo(jint),IBSET(0_8,jpos))) .EQ. 1 .AND. POPCNT(IAND(Idomo(jint),IBSET(0_8,jpos))) .EQ. 0) .AND.& + POPCNT(IAND(Isomo(jint),IBSET(0_8,jpos))) .EQ. 0) then nholes -= 1 endif enddo @@ -1593,6 +1851,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationTypes_single) !print *," singles part psi(1,1)=",psi_out(1,1) + !do i=1,n_CSF + ! print *,"i=",i," psi(i)=",psi_out(1,i) + !enddo allocate(listconnectedJ(N_INT,2,max(sze,10000))) allocate(alphas_Icfg(N_INT,2,max(sze,10000))) @@ -1607,7 +1868,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !!!====================!!! !!! Double Excitations !!! !!!====================!!! - ! Loop over all selected configurations !$OMP DO SCHEDULE(static) do i = istart_cfg,iend_cfg @@ -1617,8 +1877,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! else ! cycle - Icfg(1,1) = psi_configuration(1,1,i) - Icfg(1,2) = psi_configuration(1,2,i) + do ii=1,N_INT + Icfg(ii,1) = psi_configuration(ii,1,i) + Icfg(ii,2) = psi_configuration(ii,2,i) + enddo starti = psi_config_data(i,1) endi = psi_config_data(i,2) @@ -1629,14 +1891,15 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze Nalphas_Icfg = NalphaIcfg_list(i) alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) - if(Nalphas_Icfg .GT. maxnalphas) then - print *,"Nalpha > maxnalpha" - endif + !if(Nalphas_Icfg .GT. maxnalphas) then + ! print *,"Nalpha > maxnalpha" + !endif - call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) + !call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) ! TODO : remove doubly excited for return - !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5) + !print *,"I=",i,"isomo=",psi_configuration(1,1,i),psi_configuration(2,1,i),POPCNT(psi_configuration(1,1,i)),POPCNT(psi_configuration(2,1,i)),& + !"idomo=",psi_configuration(1,2,i),psi_configuration(2,2,i),POPCNT(psi_configuration(1,2,i)),POPCNT(psi_configuration(2,2,i)), "Nalphas_Icfg=",Nalphas_Icfg do k = 1,Nalphas_Icfg ! Now generate all singly excited with respect to a given alpha CFG @@ -1647,15 +1910,18 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, & nconnectedI, excitationIds, excitationTypes, diagfactors) + !if(i .EQ. 218) then + ! print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),alphas_Icfg(2,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' & + ! kcfgDOMO=',alphas_Icfg(1,2,k),alphas_Icfg(2,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI + ! !print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' & + ! !kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)), " NconnectedI=",nconnectedI + !endif + if(nconnectedI .EQ. 0) then cycle endif - !if(i .EQ. 1) then - ! print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)) - !endif - ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. totcolsTKI = 0 rowsTKI = -1 @@ -1665,15 +1931,30 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze p = excitationIds(1,j) q = excitationIds(2,j) extype = excitationTypes(j) + !print *,"K=",k,"j=",j, "countelec=",countelec," p=",p," q=",q, " extype=",extype, "NSOMOalpha=",NSOMOalpha," NSOMOI=",NSOMOI, "alphas_Icfg(1,1,k)=",alphas_Icfg(1,1,k), & + !alphas_Icfg(2,1,k), " domo=",alphas_Icfg(1,2,k), alphas_Icfg(2,2,k), " connected somo=",connectedI_alpha(1,1,j), & + !connectedI_alpha(2,1,j), " domo=",connectedI_alpha(1,2,j), connectedI_alpha(2,2,j) call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) ! for E_pp E_rs and E_ppE_rr case rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + !if(i.eq.218)then + ! print *,"j=",j," k=",k,"p=",p,"q=",q,"NSOMOalpha=",NSOMOalpha, "pmodel=",pmodel,"qmodel=",qmodel, "extype=",extype,& + ! "conn somo=",connectedI_alpha(1,1,j),connectedI_alpha(2,1,j),& + ! "conn domo=",connectedI_alpha(1,2,j),connectedI_alpha(2,2,j) + ! do m=1,colsikpq + ! print *,idxs_connectedI_alpha(j)+m-1 + ! enddo + !endif !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype totcolsTKI += colsikpq rowsTKI = rowsikpq enddo + !if(i.eq.1)then + ! print *,"n_st=",n_st,"rowsTKI=",rowsTKI, " nconnectedI=",nconnectedI, & + ! "totcolsTKI=",totcolsTKI + !endif allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF ! Initialize the integral container ! dims : (totcolsTKI, nconnectedI) @@ -1703,10 +1984,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) & * psi_in(kk,idxs_connectedI_alpha(j)+m-1) enddo - !if(i.eq.1) then - ! print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) - !endif enddo + !if(i.eq.1) then + ! print *,"j=",j,"psi_in=",psi_in(1,idxs_connectedI_alpha(j)+m-1) + !endif enddo diagfactors_0 = diagfactors(j)*0.5d0 @@ -1745,16 +2026,24 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze rowsTKI = rowsikpq CCmattmp = 0.d0 + !if(i.eq.1)then + ! print *,"\t n_st=",n_st," colsikpq=",colsikpq," rowsTKI=",rowsTKI,& + ! " | ",size(TKIGIJ,1),size(AIJpqContainer,1),size(CCmattmp,1) + !endif call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0, & TKIGIJ(1,1,j), size(TKIGIJ,1), & AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha), & size(AIJpqContainer,1), 0.d0, & CCmattmp, size(CCmattmp,1) ) + !print *,"j=",j,"colsikpq=",colsikpq, "sizeTIG=",size(TKIGIJ,1),"sizeaijpq=",size(AIJpqContainer,1) do m = 1,colsikpq call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) do kk = 1,n_st psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) + !if(dabs(CCmattmp(kk,m)).gt.1e-10)then + ! print *, CCmattmp(kk,m), " | ",idxs_connectedI_alpha(j)+m-1 + !end if enddo call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) enddo @@ -1789,6 +2078,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP END DO !$OMP END PARALLEL + !print *," ----- " + !do i=1,sze + ! print *,"i=",i," psi_out(i)=",psi_out(1,i) + !end do call omp_set_max_active_levels(4) deallocate(diag_energies) diff --git a/src/dav_general_mat/test_dav.irp.f b/src/dav_general_mat/test_dav.irp.f.example similarity index 100% rename from src/dav_general_mat/test_dav.irp.f rename to src/dav_general_mat/test_dav.irp.f.example diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 659602a1..8e12b9c8 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -112,6 +112,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) double precision, allocatable :: tmpU(:,:), tmpW(:,:) double precision, pointer :: W(:,:), W_csf(:,:) + !double precision, pointer :: W2(:,:), W_csf2(:,:) + !double precision, allocatable :: U2(:,:), U_csf2(:,:) logical :: disk_based double precision :: energy_shift(N_st_diag_in*davidson_sze_max) @@ -234,12 +236,15 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) + !allocate(W2(sze,N_st_diag),W_csf2(sze_csf,N_st_diag*itermax)) endif allocate( & ! Large U(sze,N_st_diag), & + !U2(sze,N_st_diag), & U_csf(sze_csf,N_st_diag*itermax), & + !U_csf2(sze_csf,N_st_diag*itermax), & ! Small h(N_st_diag*itermax,N_st_diag*itermax), & @@ -325,7 +330,7 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo enddo !tmpU =0.0d0 - !tmpU(1,2)=1.0d0 + !tmpU(1,1)=1.0d0 double precision :: irp_rdtsc double precision :: ticks_0, ticks_1 integer*8 :: irp_imax @@ -348,9 +353,9 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1)) !do i=1,sze_csf ! print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) - ! if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then - ! print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) - ! endif + ! !if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then + ! ! print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! !endif !end do !stop deallocate(tmpW) diff --git a/src/determinants/determinants.irp.f b/src/determinants/determinants.irp.f index eceab58c..cb6c14a2 100644 --- a/src/determinants/determinants.irp.f +++ b/src/determinants/determinants.irp.f @@ -329,6 +329,7 @@ END_PROVIDER BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_bit, (N_int,2,psi_det_size) ] &BEGIN_PROVIDER [ double precision, psi_coef_sorted_bit, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer, psi_det_sorted_bit_order, (psi_det_size) ] implicit none BEGIN_DOC ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation. @@ -337,8 +338,8 @@ END_PROVIDER ! function. END_DOC - call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, size(psi_coef,1), & - psi_det_sorted_bit, psi_coef_sorted_bit, N_states) + call sort_dets_by_det_search_key_ordered(N_det, psi_det, psi_coef, size(psi_coef,1), & + psi_det_sorted_bit, psi_coef_sorted_bit, N_states, psi_det_sorted_bit_order) END_PROVIDER @@ -1005,3 +1006,48 @@ BEGIN_PROVIDER [ double precision, psi_det_Hii, (N_det) ] END_PROVIDER +subroutine sort_dets_by_det_search_key_ordered(Ndet, det_in, coef_in, sze, det_out, coef_out, N_st, iorder) + use bitmasks + implicit none + integer, intent(in) :: Ndet, N_st, sze + integer(bit_kind), intent(in) :: det_in (N_int,2,sze) + double precision , intent(in) :: coef_in(sze,N_st) + integer(bit_kind), intent(out) :: det_out (N_int,2,sze) + double precision , intent(out) :: coef_out(sze,N_st) + integer, intent(out) :: iorder(sze) + BEGIN_DOC + ! Determinants are sorted according to their :c:func:`det_search_key`. + ! Useful to accelerate the search of a random determinant in the wave + ! function. + ! + ! /!\ The first dimension of coef_out and coef_in need to be psi_det_size + ! + END_DOC + integer :: i,j,k + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: det_search_key + + allocate ( bit_tmp(Ndet) ) + + do i=1,Ndet + iorder(i) = i + !$DIR FORCEINLINE + bit_tmp(i) = det_search_key(det_in(1,1,i),N_int) + enddo + call i8sort(bit_tmp,iorder,Ndet) + !DIR$ IVDEP + do i=1,Ndet + do j=1,N_int + det_out(j,1,i) = det_in(j,1,iorder(i)) + det_out(j,2,i) = det_in(j,2,iorder(i)) + enddo + do k=1,N_st + coef_out(i,k) = coef_in(iorder(i),k) + enddo + enddo + + deallocate(bit_tmp) + +end + + diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index a34608f9..b9710fd1 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -83,7 +83,7 @@ subroutine get_excitation(det1,det2,exc,degree,phase,Nint) ! exc(1,1,1) = q ! exc(1,2,1) = p - ! T^alpha_pq : exc(0,1,2) = 1 + ! T^beta_pq : exc(0,1,2) = 1 ! exc(0,2,2) = 1 ! exc(1,1,2) = q ! exc(1,2,2) = p @@ -434,6 +434,98 @@ subroutine get_single_excitation(det1,det2,exc,phase,Nint) end +subroutine get_single_excitation_cfg(cfg1,cfg2,p,q,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the excitation operator between two singly excited configurations. + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: cfg1(Nint,2) + integer(bit_kind), intent(in) :: cfg2(Nint,2) + integer, intent(out) :: p, q + integer :: tz + integer :: l, ispin, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp + integer :: exc(0:2,2,2) + + ASSERT (Nint > 0) + nperm = 0 + p = 0 + q = 0 + exc(0,1,1) = 0 + exc(0,2,1) = 0 + exc(0,1,2) = 0 + exc(0,2,2) = 0 + do ispin = 1,2 + ishift = 1-bit_kind_size + do l=1,Nint + ishift = ishift + bit_kind_size + if (cfg1(l,ispin) == cfg2(l,ispin)) then + cycle + endif + tmp = xor( cfg1(l,ispin), cfg2(l,ispin) ) + particle = iand(tmp, cfg2(l,ispin)) + hole = iand(tmp, cfg1(l,ispin)) + if (particle /= 0_bit_kind) then + tz = trailz(particle) + exc(0,2,ispin) = 1 + exc(1,2,ispin) = tz+ishift + !print *,"part ",tz+ishift, " ispin=",ispin + endif + if (hole /= 0_bit_kind) then + tz = trailz(hole) + exc(0,1,ispin) = 1 + exc(1,1,ispin) = tz+ishift + !print *,"hole ",tz+ishift, " ispin=",ispin + endif + + if ( iand(exc(0,1,ispin),exc(0,2,ispin)) /= 1) then ! exc(0,1,ispin)/=1 and exc(0,2,ispin) /= 1 + cycle + endif + + high = max(exc(1,1,ispin), exc(1,2,ispin))-1 + low = min(exc(1,1,ispin), exc(1,2,ispin)) + + ASSERT (low >= 0) + ASSERT (high > 0) + + k = shiftr(high,bit_kind_shift)+1 + j = shiftr(low,bit_kind_shift)+1 + m = iand(high,bit_kind_size-1) + n = iand(low,bit_kind_size-1) + + if (j==k) then + nperm = nperm + popcnt(iand(cfg1(j,ispin), & + iand( shiftl(1_bit_kind,m)-1_bit_kind, & + not(shiftl(1_bit_kind,n))+1_bit_kind)) ) + else + nperm = nperm + popcnt( & + iand(cfg1(j,ispin), & + iand(not(0_bit_kind), & + (not(shiftl(1_bit_kind,n)) + 1_bit_kind) ))) & + + popcnt(iand(cfg1(k,ispin), & + (shiftl(1_bit_kind,m) - 1_bit_kind ) )) + + do i=j+1,k-1 + nperm = nperm + popcnt(cfg1(i,ispin)) + end do + + endif + + ! Set p and q + q = max(exc(1,1,1),exc(1,1,2)) + p = max(exc(1,2,1),exc(1,2,2)) + return + + enddo + enddo +end + subroutine bitstring_to_list_ab( string, list, n_elements, Nint) use bitmasks implicit none diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index f4123c85..8cbf9dd0 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -83,16 +83,12 @@ subroutine run() PROVIDE scf_algorithm - if(scf_algorithm .eq. "DIIS_MO") then - call Roothaan_Hall_SCF_MO() - elseif(scf_algorithm .eq. "DIIS_MODIF") then - call Roothaan_Hall_SCF_MODIF() - elseif(scf_algorithm .eq. "DIIS") then + if(scf_algorithm .eq. "DIIS") then call Roothaan_Hall_SCF() elseif(scf_algorithm .eq. "Simple") then call Roothaan_Hall_SCF_Simple() else - print *, ' not implemented yet:', scf_algorithm + print *, scf_algorithm, ' not implemented yet' endif call ezfio_set_hartree_fock_energy(SCF_energy) diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 56d8cf28..411d2d4e 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -38,7 +38,7 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] print*, 'MO integrals provided' return else - PROVIDE ao_two_e_integrals_in_map + PROVIDE ao_two_e_integrals_in_map endif print *, '' @@ -245,18 +245,16 @@ subroutine add_integrals_to_map(mask_ijkl) return endif - double precision :: accu_bis - accu_bis = 0.d0 call wall_time(wall_1) - size_buffer = min( (qp_max_mem/(nproc*5)),mo_num*mo_num*mo_num) + size_buffer = min(mo_num*mo_num*mo_num,8000000) print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num,accu_bis) & + !$OMP wall_0,thread_num) & !$OMP DEFAULT(NONE) & !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, & !$OMP mo_coef_transp, & @@ -434,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP END DO NOWAIT deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - integer :: index_needed - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) + if (n_integrals > 0) then + call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& + real(mo_integrals_threshold,integral_kind)) + endif deallocate(buffer_i, buffer_value) !$OMP END PARALLEL call map_merge(mo_integrals_map) @@ -527,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) call wall_time(wall_1) call cpu_time(cpu_1) - double precision :: accu_bis - accu_bis = 0.d0 !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num,accu_bis) & + !$OMP wall_0,thread_num) & !$OMP DEFAULT(NONE) & !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, & !$OMP mo_coef_transp, & @@ -730,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) !$OMP END DO NOWAIT deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - integer :: index_needed - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f index ca00b816..5e7ef7e9 100644 --- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f +++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f @@ -306,7 +306,7 @@ subroutine test_int2_grad1_u12_ao() call num_int2_grad1_u12_ao(i, j, ipoint, integ) - i_exc = int2_grad1_u12_ao(1,i,j,ipoint) + i_exc = int2_grad1_u12_ao(i,j,ipoint,1) i_num = integ(1) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then @@ -318,7 +318,7 @@ subroutine test_int2_grad1_u12_ao() acc_tot += acc_ij normalz += dabs(i_num) - i_exc = int2_grad1_u12_ao(2,i,j,ipoint) + i_exc = int2_grad1_u12_ao(i,j,ipoint,2) i_num = integ(2) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then @@ -330,7 +330,7 @@ subroutine test_int2_grad1_u12_ao() acc_tot += acc_ij normalz += dabs(i_num) - i_exc = int2_grad1_u12_ao(3,i,j,ipoint) + i_exc = int2_grad1_u12_ao(i,j,ipoint,3) i_num = integ(3) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then @@ -382,7 +382,7 @@ subroutine test_int2_u_grad1u_total_j1b2() call num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ) - i_exc = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(1,i,j,ipoint) + i_exc = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,1) i_num = integ(1) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then @@ -394,7 +394,7 @@ subroutine test_int2_u_grad1u_total_j1b2() acc_tot += acc_ij normalz += dabs(i_num) - i_exc = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(2,i,j,ipoint) + i_exc = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,2) i_num = integ(2) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then @@ -406,7 +406,7 @@ subroutine test_int2_u_grad1u_total_j1b2() acc_tot += acc_ij normalz += dabs(i_num) - i_exc = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(3,i,j,ipoint) + i_exc = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,3) i_num = integ(3) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index c941b427..ff3d11f3 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -70,9 +70,9 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi gradu_squared_u_ij_mu(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint) & + tmp2 * int2_u2_j1b2 (i,j,ipoint) & - + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) & - + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) & - + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint) + + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) & + + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) & + + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3) enddo enddo enddo @@ -104,11 +104,11 @@ END_PROVIDER ! --- -!BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] +!BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)] ! ! BEGIN_DOC ! ! -! ! tc_grad_square_ao(k,i,l,j) = -1/2 +! ! tc_grad_square_ao_loop(k,i,l,j) = -1/2 ! ! ! END_DOC ! @@ -142,8 +142,8 @@ END_PROVIDER ! do l = 1, ao_num ! do i = 1, ao_num ! do k = 1, ao_num -! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) -! !write(11,*) tc_grad_square_ao(k,i,l,j) +! tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) +! !write(11,*) tc_grad_square_ao_loop(k,i,l,j) ! enddo ! enddo ! enddo @@ -155,19 +155,23 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] +BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC ! - ! tc_grad_square_ao(k,i,l,j) = -1/2 + ! tc_grad_square_ao_loop(k,i,l,j) = 1/2 ! END_DOC implicit none integer :: ipoint, i, j, k, l double precision :: weight1, ao_ik_r, ao_i_r + double precision :: time0, time1 double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:) + print*, ' providing tc_grad_square_ao_loop ...' + call wall_time(time0) + allocate(ac_mat(ao_num,ao_num,ao_num,ao_num)) ac_mat = 0.d0 allocate(bc_mat(ao_num,ao_num,ao_num,ao_num)) @@ -177,10 +181,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao weight1 = final_weight_at_r_vector(ipoint) do i = 1, ao_num - ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i) + !ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i) + ao_i_r = weight1 * aos_in_r_array(i,ipoint) do k = 1, ao_num - ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k) + !ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k) + ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint) do j = 1, ao_num do l = 1, ao_num @@ -196,7 +202,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j) + tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j) enddo enddo enddo @@ -205,6 +211,9 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao deallocate(ac_mat) deallocate(bc_mat) + call wall_time(time1) + print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0 + END_PROVIDER ! --- @@ -329,9 +338,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, tmp9 = int2_u_grad1u_j1b2(i,j,ipoint) - u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) & - + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) & - + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) & + + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) & + + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3) enddo enddo enddo @@ -343,3 +352,86 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] + + BEGIN_DOC + ! + ! tc_grad_square_ao(k,i,l,j) = 1/2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, k, l + double precision :: weight1, ao_ik_r, ao_i_r + double precision :: time0, time1 + double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:) + + print*, ' providing tc_grad_square_ao ...' + call wall_time(time0) + + allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, l, ipoint) & + !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do l = 1, ao_num + tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + ac_mat = 0.d0 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 1.d0, ac_mat, ao_num*ao_num) + deallocate(tmp, b_mat) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l) & + !$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(ac_mat) + + call wall_time(time1) + print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 + +END_PROVIDER + +! --- diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index 14749082..180c9588 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -10,51 +10,75 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu implicit none integer :: ipoint, i, j, k, l double precision :: weight1, ao_ik_r, ao_i_r,contrib,contrib2 - double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:) - double precision :: wall1, wall0 + double precision :: time0, time1 + double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:) + + print*, ' providing tc_grad_square_ao_test ...' + call wall_time(time0) + provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test - call wall_time(wall0) - allocate(ac_mat(ao_num,ao_num,ao_num,ao_num)) - ac_mat = 0.d0 - allocate(bc_mat(ao_num,ao_num,ao_num,ao_num)) - bc_mat = 0.d0 + allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - do ipoint = 1, n_points_final_grid - weight1 = final_weight_at_r_vector(ipoint) - - do j = 1, ao_num - do l = 1, ao_num - contrib = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) - contrib2=grad12_j12_test(l,j,ipoint) - do i = 1, ao_num - ao_i_r = weight1 * aos_in_r_array(i,ipoint) - - do k = 1, ao_num - ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint) - - ac_mat(k,i,l,j) += ao_ik_r * contrib - bc_mat(k,i,l,j) += ao_ik_r * contrib2 - enddo - enddo + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, l, ipoint) & + !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do l = 1, ao_num + tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ac_mat = 0.d0 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 1.d0, ac_mat, ao_num*ao_num) + deallocate(tmp, b_mat) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l) & + !$OMP SHARED (ac_mat, tc_grad_square_ao_test, ao_num) + !$OMP DO SCHEDULE (static) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j) + tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) enddo enddo enddo enddo - call wall_time(wall1) - print*,'wall time for tc_grad_square_ao_test',wall1 - wall0 + !$OMP END DO + !$OMP END PARALLEL deallocate(ac_mat) - deallocate(bc_mat) + + call wall_time(time1) + print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 END_PROVIDER @@ -88,6 +112,7 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_f END_PROVIDER +! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ] @@ -99,8 +124,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao double precision :: time0, time1 double precision, external :: overlap_gauss_r12_ao - provide int2_u_grad1u_x_j1b2_test print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...' + + provide int2_u_grad1u_x_j1b2_test call wall_time(time0) do ipoint = 1, n_points_final_grid @@ -126,9 +152,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint) - u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(1,i,j,ipoint) & - + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(2,i,j,ipoint) & - + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(3,i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) & + + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) & + + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,3) enddo enddo enddo @@ -192,3 +218,4 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi END_PROVIDER ! --- + diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index a304324c..854789bd 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -1,22 +1,21 @@ - ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! - ! int2_grad1_u12_ao(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) ! ! if J(r1,r2) = u12: ! - ! int2_grad1_u12_ao(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] ! ! if J(r1,r2) = u12 x v1 x v2 ! - ! int2_grad1_u12_ao(:,i,j,ipoint) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] + ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) @@ -27,8 +26,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin implicit none integer :: ipoint, i, j + double precision :: time0, time1 double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + print*, ' providing int2_grad1_u12_ao ...' + call wall_time(time0) + PROVIDE j1b_type if(j1b_type .eq. 3) then @@ -49,9 +52,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) - int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - tmp2 * tmp_x - int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - tmp2 * tmp_y - int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) - tmp2 * tmp_z + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z enddo enddo enddo @@ -67,9 +70,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin do i = 1, ao_num tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint) - int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint) - int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1) + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2) + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3) enddo enddo enddo @@ -78,6 +81,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin endif + call wall_time(time1) + print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0 + END_PROVIDER ! --- @@ -94,7 +100,7 @@ BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_poin ! ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1) ! = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] - ! = -int2_grad1_u12_ao(:,i,j,ipoint) + ! = -int2_grad1_u12_ao(i,j,ipoint,:) ! ! if J(r1,r2) = u12 x v1 x v2 ! @@ -131,9 +137,9 @@ BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_poin tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) - int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - tmp2 * tmp_x - int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - tmp2 * tmp_y - int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) - tmp2 * tmp_z + int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z enddo enddo enddo @@ -148,11 +154,11 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)] +BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC ! - ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > + ! tc_grad_and_lapl_ao_loop(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > ! ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! @@ -165,8 +171,12 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, double precision :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z double precision :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz double precision :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz + double precision :: time0, time1 double precision, allocatable :: ac_mat(:,:,:,:) + print*, ' providing tc_grad_and_lapl_ao_loop ...' + call wall_time(time0) + allocate(ac_mat(ao_num,ao_num,ao_num,ao_num)) ac_mat = 0.d0 @@ -176,24 +186,32 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) do i = 1, ao_num - ao_i_r = weight1 * aos_in_r_array_transp (ipoint,i) - ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3) + !ao_i_r = weight1 * aos_in_r_array_transp (ipoint,i) + !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1) + !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2) + !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3) + ao_i_r = weight1 * aos_in_r_array (i,ipoint) + ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1) + ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2) + ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3) do k = 1, ao_num - ao_k_r = aos_in_r_array_transp(ipoint,k) + !ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_k_r = aos_in_r_array(k,ipoint) - tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) - tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) - tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) + !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) + !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) + !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) + tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) + tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) + tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) do j = 1, ao_num do l = 1, ao_num - contrib_x = int2_grad1_u12_ao(1,l,j,ipoint) * tmp_x - contrib_y = int2_grad1_u12_ao(2,l,j,ipoint) * tmp_y - contrib_z = int2_grad1_u12_ao(3,l,j,ipoint) * tmp_z + contrib_x = int2_grad1_u12_ao(l,j,ipoint,1) * tmp_x + contrib_y = int2_grad1_u12_ao(l,j,ipoint,2) * tmp_y + contrib_z = int2_grad1_u12_ao(l,j,ipoint,3) * tmp_z ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z enddo @@ -223,9 +241,9 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ! do i = 1, ao_num ! do k = 1, ao_num - ! contrib_x = int2_grad1_u12_ao(1,k,i,ipoint) * tmp_x - ! contrib_y = int2_grad1_u12_ao(2,k,i,ipoint) * tmp_y - ! contrib_z = int2_grad1_u12_ao(3,k,i,ipoint) * tmp_z + ! contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x + ! contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y + ! contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z ! ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z ! enddo @@ -240,8 +258,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - !tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) enddo enddo enddo @@ -249,6 +267,92 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, deallocate(ac_mat) + call wall_time(time1) + print*, ' Wall time for tc_grad_and_lapl_ao_loop = ', time1 - time0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)] + + BEGIN_DOC + ! + ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > + ! + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! + ! This is obtained by integration by parts. + ! + END_DOC + + implicit none + integer :: ipoint, i, j, k, l, m + double precision :: weight1, ao_k_r, ao_i_r + double precision :: time0, time1 + double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:) + + print*, ' providing tc_grad_and_lapl_ao ...' + call wall_time(time0) + + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) + b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) + b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ac_mat = 0.d0 + do m = 1, 3 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + , 1.d0, ac_mat, ao_num*ao_num) + + enddo + deallocate(b_mat) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l) & + !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + !tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(ac_mat) + + call wall_time(time1) + print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0 + END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index a2287f66..4d85e061 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -1,20 +1,20 @@ -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! - ! int2_grad1_u12_ao_test(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) ! ! if J(r1,r2) = u12: ! - ! int2_grad1_u12_ao_test(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] ! ! if J(r1,r2) = u12 x v1 x v2 ! - ! int2_grad1_u12_ao_test(:,i,j,ipoint) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) @@ -25,8 +25,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n implicit none integer :: ipoint, i, j + double precision :: time0, time1 double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + print*, ' providing int2_grad1_u12_ao_test ...' + call wall_time(time0) + PROVIDE j1b_type if(j1b_type .eq. 3) then @@ -43,14 +47,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n do j = 1, ao_num do i = 1, ao_num -! if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint) tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint) - int2_grad1_u12_ao_test(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,i,j,ipoint) - tmp2 * tmp_x - int2_grad1_u12_ao_test(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,i,j,ipoint) - tmp2 * tmp_y - int2_grad1_u12_ao_test(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,i,j,ipoint) - tmp2 * tmp_z + int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z enddo enddo enddo @@ -66,9 +69,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n do i = 1, ao_num tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - int2_grad1_u12_ao_test(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint) - int2_grad1_u12_ao_test(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint) - int2_grad1_u12_ao_test(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint) + int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1) + int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2) + int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3) enddo enddo enddo @@ -77,8 +80,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n endif + call wall_time(time1) + print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0 + END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC @@ -92,48 +100,57 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ END_DOC implicit none - integer :: ipoint, i, j, k, l + integer :: ipoint, i, j, k, l, m double precision :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z double precision :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz - double precision, allocatable :: ac_mat(:,:,:,:) - double precision :: wall0, wall1 + double precision :: time0, time1 + double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:) + + print*, ' providing tc_grad_and_lapl_ao_test ...' + call wall_time(time0) provide int2_grad1_u12_ao_test - call wall_time(wall0) - allocate(ac_mat(ao_num,ao_num,ao_num,ao_num)) - ac_mat = 0.d0 + + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) - do ipoint = 1, n_points_final_grid - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - do j = 1, ao_num - do l = 1, ao_num - contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint) - contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint) - contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint) - do i = 1, ao_num - ao_i_r = weight1 * aos_in_r_array (i,ipoint) - ao_i_dx = weight1 * aos_grad_in_r_array_transp(1,i,ipoint) - ao_i_dy = weight1 * aos_grad_in_r_array_transp(2,i,ipoint) - ao_i_dz = weight1 * aos_grad_in_r_array_transp(3,i,ipoint) - - do k = 1, ao_num - ao_k_r = aos_in_r_array(k,ipoint) - - tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp(1,k,ipoint) - tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp(2,k,ipoint) - tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp(3,k,ipoint) + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid - tmp_x *= contrib_x - tmp_y *= contrib_y - tmp_z *= contrib_z + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) - ac_mat(k,i,l,j) += tmp_x + tmp_y + tmp_z - enddo - enddo + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) + b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) + b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) enddo enddo enddo - + !$OMP END DO + !$OMP END PARALLEL + + ac_mat = 0.d0 + do m = 1, 3 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + , 1.d0, ac_mat, ao_num*ao_num) + + enddo + deallocate(b_mat) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l) & + !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) + !$OMP DO SCHEDULE (static) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num @@ -143,11 +160,15 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL - call wall_time(wall1) - print*,'wall time for tc_grad_and_lapl_ao_test',wall1 - wall0 deallocate(ac_mat) + call wall_time(time1) + print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0 + END_PROVIDER ! --- + diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index bdd5e5ac..81747553 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -7,6 +7,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao integer :: i, j, k, l double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist ...' call wall_time(wall0) if(test_cycle_tc)then @@ -36,6 +37,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_nu integer :: i, j, k, l double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist_test ...' call wall_time(wall0) do j = 1, ao_num @@ -47,8 +49,10 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_nu enddo enddo enddo + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0 + END_PROVIDER ! --- diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f index b621206a..78fddf54 100644 --- a/src/non_hermit_dav/biorthog.irp.f +++ b/src/non_hermit_dav/biorthog.irp.f @@ -444,8 +444,8 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei endif call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.) - !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec) - !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec) + !call impose_biorthog_qr(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec) + !call impose_biorthog_lu(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec) ! --- @@ -611,7 +611,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva enddo accu_nd = dsqrt(accu_nd) - if(accu_nd .lt. 1d-8) then + if(accu_nd .lt. thresh_biorthog_nondiag) then ! L x R is already bi-orthogonal print *, ' L & T bi-orthogonality: ok' @@ -623,7 +623,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva print *, ' L & T bi-orthogonality: not imposed yet' print *, ' accu_nd = ', accu_nd - call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec) + call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec) deallocate( S ) endif @@ -633,7 +633,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva return -end +end subroutine non_hrmt_bieig_random_diag ! --- @@ -961,7 +961,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval) enddo accu_nd = dsqrt(accu_nd) - if( accu_nd .lt. 1d-8 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then ! L x R is already bi-orthogonal !print *, ' L & T bi-orthogonality: ok' @@ -973,7 +973,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval) !print *, ' L & T bi-orthogonality: not imposed yet' !print *, ' accu_nd = ', accu_nd - call impose_biorthog_qr(n, n, leigvec, reigvec) + call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec) deallocate( S ) endif diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f index 07ac5917..4dea5f66 100644 --- a/src/non_hermit_dav/new_routines.irp.f +++ b/src/non_hermit_dav/new_routines.irp.f @@ -132,9 +132,9 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print *, ' accu_nd bi-orthog = ', accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print *, ' ' @@ -149,14 +149,14 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei deallocate(S_nh_inv_half) call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) - if( accu_nd .lt. 1d-10 ) then + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'New vectors not bi-orthonormals at ',accu_nd call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) - if( accu_nd .lt. 1d-10 ) then + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'New vectors not bi-orthonormals at ',accu_nd @@ -200,10 +200,10 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei shift_current = max(1.d-10,shift_current) print*,'Thr for eigenvectors = ',shift_current call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print *, ' accu_nd bi-orthog = ', accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' @@ -354,14 +354,14 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print *, ' accu_nd bi-orthog = ', accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print *, ' ' print *, ' bi-orthogonality: not imposed yet' - if(complex_root)then + if(complex_root) then print *, ' ' print *, ' ' print *, ' orthog between degen eigenvect' @@ -369,9 +369,9 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'New vectors not bi-orthonormals at ', accu_nd @@ -387,8 +387,8 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e print*,'S^{-1/2} exists !!' call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization endif - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.) - if( accu_nd .lt. 1d-10 ) then + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'New vectors not bi-orthonormals at ',accu_nd @@ -431,10 +431,10 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e shift_current = max(1.d-10,shift_current) print*,'Thr for eigenvectors = ',shift_current call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print *, ' accu_nd bi-orthog = ', accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' @@ -472,6 +472,7 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) double precision :: accu,thr_cut double precision, allocatable :: S_nh_inv_half(:,:) logical :: complex_root + double precision :: thr_norm=1d0 thr_cut = 1.d-15 @@ -580,9 +581,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print *, ' accu_nd bi-orthog = ', accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print *, ' ' @@ -593,9 +594,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) print *, ' ' ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print*,'accu_nd = ',accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'New vectors not bi-orthonormals at ',accu_nd @@ -608,8 +609,8 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization endif endif - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) - if( accu_nd .lt. 1d-10 ) then + call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'New vectors not bi-orthonormals at ',accu_nd @@ -651,11 +652,11 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) print*,'Checking for final reigvec/leigvec' shift_current = max(1.d-10,shift_current) print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec,shift_current) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S) + call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) + call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) print *, ' accu_nd bi-orthog = ', accu_nd - if( accu_nd .lt. 1d-10 ) then + if(accu_nd .lt. thresh_biorthog_nondiag) then print *, ' bi-orthogonality: ok' else print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' diff --git a/src/scf_utils/diagonalize_fock.irp.f b/src/scf_utils/diagonalize_fock.irp.f index a6f19c05..008344f2 100644 --- a/src/scf_utils/diagonalize_fock.irp.f +++ b/src/scf_utils/diagonalize_fock.irp.f @@ -20,6 +20,12 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num) enddo enddo + !print *, ' Fock_matrix_MO :' + !do i = 1, mo_num + ! write(*, '(100(f15.7, 2x))') (Fock_matrix_MO(j,i), j = 1, mo_num) + !enddo + + if(frozen_orb_scf)then integer :: iorb,jorb do i = 1, n_core_orb @@ -89,6 +95,10 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num) call dsyevd( 'V', 'U', mo_num, F, & size(F,1), diag, work, lwork, iwork, liwork, info) deallocate(iwork) + !print*, ' Fock eigval:' + !do i = 1, mo_num + ! print *, diag(i) + !enddo if (info /= 0) then diff --git a/src/scf_utils/diis.irp.f b/src/scf_utils/diis.irp.f index 00d4addb..63a847ce 100644 --- a/src/scf_utils/diis.irp.f +++ b/src/scf_utils/diis.irp.f @@ -248,3 +248,71 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_a, (AO_num, AO_num)] + + implicit none + double precision, allocatable :: scratch(:,:) + + allocate(scratch(AO_num, AO_num)) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0 & + , Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1), SCF_density_matrix_ao_alpha, size(SCF_Density_Matrix_AO_alpha, 1) & + , 0.d0, scratch, size(scratch, 1) ) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0 & + , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) & + , 0.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) ) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0 & + , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_alpha, size(SCF_density_matrix_ao_alpha, 1) & + , 0.d0, scratch, size(scratch, 1) ) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0 & + , scratch, size(scratch, 1), Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1) & + , 1.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_b, (AO_num, AO_num)] + + implicit none + double precision, allocatable :: scratch(:,:) + + allocate(scratch(AO_num, AO_num)) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0 & + , Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1), SCF_density_matrix_ao_beta, size(SCF_Density_Matrix_AO_beta, 1) & + , 0.d0, scratch, size(scratch, 1) ) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0 & + , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) & + , 0.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) ) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0 & + , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_beta, size(SCF_density_matrix_ao_beta, 1) & + , 0.d0, scratch, size(scratch, 1) ) + + call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0 & + , scratch, size(scratch, 1), Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1) & + , 1.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_a, (mo_num, mo_num)] + implicit none + call ao_to_mo(FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1), FPS_SPF_Matrix_MO_a, size(FPS_SPF_Matrix_MO_a, 1)) +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_b, (mo_num, mo_num)] + implicit none + call ao_to_mo(FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1), FPS_SPF_Matrix_MO_b, size(FPS_SPF_Matrix_MO_b, 1)) +END_PROVIDER + +! --- + diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f index 9a95caa1..baefcd6c 100644 --- a/src/scf_utils/fock_matrix.irp.f +++ b/src/scf_utils/fock_matrix.irp.f @@ -15,7 +15,7 @@ ! ! Rcc = Acc Fcc^a + Bcc Fcc^b ! Roo = Aoo Foo^a + Boo Foo^b - ! Rcc = Avv Fvv^a + Bvv Fvv^b + ! Rvv = Avv Fvv^a + Bvv Fvv^b ! Fcv = (F^a + F^b)/2 ! ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) @@ -267,3 +267,5 @@ BEGIN_PROVIDER [ double precision, SCF_energy ] END_PROVIDER +! --- + diff --git a/src/scf_utils/rh_scf_simple.irp.f b/src/scf_utils/rh_scf_simple.irp.f index 59b12749..cd7ba66f 100644 --- a/src/scf_utils/rh_scf_simple.irp.f +++ b/src/scf_utils/rh_scf_simple.irp.f @@ -63,35 +63,34 @@ END_DOC energy_SCF = SCF_energy Delta_energy_SCF = energy_SCF - energy_SCF_previous - double precision :: level_shift_save - level_shift_save = level_shift - mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num) - do while (Delta_energy_SCF > 0.d0) - mo_coef(1:ao_num,1:mo_num) = mo_coef_save - if (level_shift <= .1d0) then - level_shift = 1.d0 - else - level_shift = level_shift * 3.0d0 - endif - TOUCH mo_coef level_shift - mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num) - if(frozen_orb_scf)then - call reorder_core_orb - call initialize_mo_coef_begin_iteration - endif - TOUCH mo_coef - Delta_energy_SCF = SCF_energy - energy_SCF_previous - energy_SCF = SCF_energy - if (level_shift-level_shift_save > 40.d0) then - level_shift = level_shift_save * 4.d0 - SOFT_TOUCH level_shift - exit - endif + !double precision :: level_shift_save + !level_shift_save = level_shift + !mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num) + !do while (Delta_energy_SCF > 0.d0) + ! mo_coef(1:ao_num,1:mo_num) = mo_coef_save + ! if (level_shift <= .1d0) then + ! level_shift = 1.d0 + ! else + ! level_shift = level_shift * 3.0d0 + ! endif + ! TOUCH mo_coef level_shift + ! mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num) + ! if(frozen_orb_scf)then + ! call reorder_core_orb + ! call initialize_mo_coef_begin_iteration + ! endif + ! TOUCH mo_coef + ! Delta_energy_SCF = SCF_energy - energy_SCF_previous + ! energy_SCF = SCF_energy + ! if (level_shift-level_shift_save > 40.d0) then + ! level_shift = level_shift_save * 4.d0 + ! SOFT_TOUCH level_shift + ! exit + ! endif + !enddo + !level_shift = level_shift * 0.5d0 + !SOFT_TOUCH level_shift - enddo - - level_shift = level_shift * 0.5d0 - SOFT_TOUCH level_shift energy_SCF_previous = energy_SCF ! Print results at the end of each iteration @@ -100,7 +99,7 @@ END_DOC iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS if(Delta_energy_SCF < 0.d0) then - call save_mos + call save_mos() endif if(qp_stop()) exit diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 45522079..9ec61ced 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -86,10 +86,9 @@ END_DOC iteration_SCF,dim_DIIS & ) - Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0 - Fock_matrix_AO_beta = Fock_matrix_AO*0.5d0 + Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0 + Fock_matrix_AO_beta = Fock_matrix_AO!*0.5d0 TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta - endif MO_coef = eigenvectors_Fock_matrix_MO @@ -100,18 +99,14 @@ END_DOC TOUCH MO_coef -! Calculate error vectors - - max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO)) - ! SCF energy energy_SCF = SCF_energy Delta_energy_SCF = energy_SCF - energy_SCF_previous if ( (SCF_algorithm == 'DIIS').and.(Delta_energy_SCF > 0.d0) ) then Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS) - Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0 - Fock_matrix_AO_beta = Fock_matrix_AO*0.5d0 + Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0 + Fock_matrix_AO_beta = Fock_matrix_AO!*0.5d0 TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta endif @@ -147,6 +142,9 @@ END_DOC SOFT_TOUCH level_shift energy_SCF_previous = energy_SCF +! Calculate error vectors + max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO)) + ! Print results at the end of each iteration write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)') & diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f new file mode 100644 index 00000000..291c52ef --- /dev/null +++ b/src/tc_bi_ortho/tc_som.irp.f @@ -0,0 +1,70 @@ +! --- + +program tc_som + + BEGIN_DOC + ! TODO : Put the documentation of the program here + END_DOC + + implicit none + + print *, ' starting ...' + print *, ' do not forget to do tc-scf first' + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 +! my_n_pt_r_grid = 10 ! small grid for quick debug +! my_n_pt_a_grid = 26 ! small grid for quick debug + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + PROVIDE mu_erf + print *, ' mu = ', mu_erf + PROVIDE j1b_type + print *, ' j1b_type = ', j1b_type + print *, j1b_pen + + read_wf = .true. + touch read_wf + + call main() + +end + +! --- + +subroutine main() + + implicit none + integer :: i, i_HF, degree + double precision :: hmono_1, htwoe_1, hthree_1, htot_1 + double precision :: hmono_2, htwoe_2, hthree_2, htot_2 + double precision :: U_SOM + + PROVIDE N_int N_det + + do i = 1, N_det + call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int) + if(degree == 0) then + i_HF = i + exit + endif + enddo + print *, ' HF determinants:', i_HF + print *, ' N_det :', N_det + + U_SOM = 0.d0 + do i = 1, N_det + if(i == i_HF) cycle + call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + U_SOM += htot_1 * htot_2 + enddo + U_SOM = 0.5d0 * U_SOM + print *, ' U_SOM = ', U_SOM + + return +end subroutine main + +! --- + diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 26d75ad4..fabc3d14 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -134,7 +134,31 @@ default: False type: integer doc: nb of Gaussians used to fit Jastrow fcts interface: ezfio,provider,ocaml -default: 6 +default: 20 + +[tcscf_algorithm] +type: character*(32) +doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] +interface: ezfio,provider,ocaml +default: Simple + +[test_cycle_tc] +type: logical +doc: If |true|, the integrals of the three-body jastrow are computed with cycles +interface: ezfio,provider,ocaml +default: False + +[thresh_biorthog_diag] +type: Threshold +doc: Threshold to determine if diagonal elements of the bi-orthogonal condition L.T x R are close enouph to 1 +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_biorthog_nondiag] +type: Threshold +doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0 +interface: ezfio,provider,ocaml +default: 1.e-6 [max_dim_diis_tcscf] type: integer @@ -154,21 +178,9 @@ doc: Energy shift on the virtual MOs to improve TCSCF convergence interface: ezfio,provider,ocaml default: 0. -[tcscf_algorithm] -type: character*(32) -doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] -interface: ezfio,provider,ocaml -default: Simple - [im_thresh_tcscf] type: Threshold doc: Thresholds on the Imag part of energy interface: ezfio,provider,ocaml default: 1.e-7 -[test_cycle_tc] -type: logical -doc: If |true|, the integrals of the three-body jastrow are computed with cycles -interface: ezfio,provider,ocaml -default: False - diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f index 9c571f8a..726169d9 100644 --- a/src/tc_scf/diago_bi_ort_tcfock.irp.f +++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f @@ -13,14 +13,10 @@ integer :: n_real_tc integer :: i, j, k, l double precision :: accu_d, accu_nd, accu_tmp - double precision :: thr_d, thr_nd double precision :: norm double precision, allocatable :: eigval_right_tmp(:) double precision, allocatable :: F_tmp(:,:) - thr_d = 1d-6 - thr_nd = 1d-6 - allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) ) PROVIDE Fock_matrix_tc_mo_tot @@ -38,13 +34,13 @@ F_tmp(i,i) += level_shift_tcscf enddo - call non_hrmt_bieig( mo_num, F_tmp, thr_d, thr_nd & - , fock_tc_leigvec_mo, fock_tc_reigvec_mo & + call non_hrmt_bieig( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , fock_tc_leigvec_mo, fock_tc_reigvec_mo & , n_real_tc, eigval_right_tmp ) !if(max_ov_tc_scf)then - ! call non_hrmt_fock_mat( mo_num, F_tmp, thr_d, thr_nd & - ! , fock_tc_leigvec_mo, fock_tc_reigvec_mo & + ! call non_hrmt_fock_mat( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + ! , fock_tc_leigvec_mo, fock_tc_reigvec_mo & ! , n_real_tc, eigval_right_tmp ) !else ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp & @@ -88,16 +84,16 @@ else accu_tmp = overlap_fock_tc_eigvec_mo(k,i) accu_nd += accu_tmp * accu_tmp - if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then + if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i) endif endif enddo enddo accu_nd = dsqrt(accu_nd) / accu_d - if(accu_nd .gt. thr_nd) then + if(accu_nd .gt. thresh_biorthog_nondiag) then print *, ' bi-orthog failed' - print *, ' accu_nd MO = ', accu_nd, thr_nd + print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag print *, ' overlap_fock_tc_eigvec_mo = ' do i = 1, mo_num write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:) @@ -107,14 +103,14 @@ ! --- - if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d) then + if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thresh_biorthog_diag) then print *, ' mo_num = ', mo_num - print *, ' accu_d MO = ', accu_d, thr_d + print *, ' accu_d MO = ', accu_d, thresh_biorthog_diag print *, ' normalizing vectors ...' do i = 1, mo_num norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i))) - if(norm .gt. thr_d) then + if(norm .gt. thresh_biorthog_diag) then do k = 1, mo_num fock_tc_reigvec_mo(k,i) *= 1.d0/norm fock_tc_leigvec_mo(k,i) *= 1.d0/norm @@ -137,16 +133,16 @@ else accu_tmp = overlap_fock_tc_eigvec_mo(k,i) accu_nd += accu_tmp * accu_tmp - if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then + if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i) endif endif enddo enddo accu_nd = dsqrt(accu_nd) / accu_d - if(accu_nd .gt. thr_nd) then + if(accu_nd .gt. thresh_biorthog_diag) then print *, ' bi-orthog failed' - print *, ' accu_nd MO = ', accu_nd, thr_nd + print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag print *, ' overlap_fock_tc_eigvec_mo = ' do i = 1, mo_num write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:) @@ -177,6 +173,7 @@ END_PROVIDER double precision :: accu, accu_d double precision, allocatable :: tmp(:,:) + PROVIDE mo_l_coef mo_r_coef ! ! MO_R x R call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 & diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 048255f6..fccfd837 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -310,46 +310,6 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] deallocate(f_tmp) !$OMP END PARALLEL -! TODO -! !$OMP PARALLEL DEFAULT (NONE) & -! !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, & -! !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & -! !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) -! !$OMP DO -! do g = 1, ao_num -! do e = 1, ao_num -! dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) -! dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) -! dm_ge = dm_ge_a + dm_ge_b -! do d = 1, ao_num -! do k = 1, ao_num -! dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) -! dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) -! dm_dk = dm_dk_a + dm_dk_b -! do mu = 1, ao_num -! do nu = 1, ao_num -! call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) -! fock_3e_uhf_ao_a(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & -! + dm_ge_a * dm_dk_a * i_mugd_eknu & -! + dm_ge_a * dm_dk_a * i_mugd_knue & -! - dm_ge_a * dm_dk * i_mugd_enuk & -! - dm_ge * dm_dk_a * i_mugd_kenu & -! - dm_ge_a * dm_dk_a * i_mugd_nuke & -! - dm_ge_b * dm_dk_b * i_mugd_nuke ) -! enddo -! enddo -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL - call wall_time(tf) print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti @@ -436,44 +396,6 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] deallocate(f_tmp) !$OMP END PARALLEL -! TODO -! !$OMP PARALLEL DO DEFAULT (NONE) & -! !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, & -! !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & -! !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) -! do g = 1, ao_num -! do e = 1, ao_num -! dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) -! dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) -! dm_ge = dm_ge_a + dm_ge_b -! do d = 1, ao_num -! do k = 1, ao_num -! dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) -! dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) -! dm_dk = dm_dk_a + dm_dk_b -! do mu = 1, ao_num -! do nu = 1, ao_num -! call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) -! call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) -! fock_3e_uhf_ao_b(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & -! + dm_ge_b * dm_dk_b * i_mugd_eknu & -! + dm_ge_b * dm_dk_b * i_mugd_knue & -! - dm_ge_b * dm_dk * i_mugd_enuk & -! - dm_ge * dm_dk_b * i_mugd_kenu & -! - dm_ge_b * dm_dk_b * i_mugd_nuke & -! - dm_ge_a * dm_dk_a * i_mugd_nuke ) -! enddo -! enddo -! enddo -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO - call wall_time(tf) print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 5981791c..6796666d 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -1,35 +1,45 @@ ! --- - BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)] - BEGIN_DOC -! two_e_tc_non_hermit_integral_alpha(k,i) = -! -! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions - END_DOC + BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)] + + BEGIN_DOC + ! + ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = + ! + ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions + ! + END_DOC + implicit none integer :: i, j, k, l double precision :: density, density_a, density_b + double precision :: t0, t1 - two_e_tc_non_hermit_integral_alpha = 0.d0 - two_e_tc_non_hermit_integral_beta = 0.d0 + !print*, ' providing two_e_tc_non_hermit_integral_seq ...' + !call wall_time(t0) + + two_e_tc_non_hermit_integral_seq_alpha = 0.d0 + two_e_tc_non_hermit_integral_seq_beta = 0.d0 - !! TODO :: parallelization properly done do i = 1, ao_num do k = 1, ao_num -!!$OMP PARALLEL & -!!$OMP DEFAULT (NONE) & -!!$OMP PRIVATE (j,l,density_a,density_b,density) & -!!$OMP SHARED (i,k,ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,ao_non_hermit_term_chemist) & -!!$OMP SHARED (two_e_tc_non_hermit_integral_alpha,two_e_tc_non_hermit_integral_beta) -!!$OMP DO SCHEDULE (dynamic) do j = 1, ao_num do l = 1, ao_num density_a = TCSCF_density_matrix_ao_alpha(l,j) density_b = TCSCF_density_matrix_ao_beta (l,j) - density = density_a + density_b + density = density_a + density_b + + !! rho(l,j) * < k l| T | i j> + !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density * ao_two_e_tc_tot(l,j,k,i) + !! rho(l,j) * < k l| T | i j> + !two_e_tc_non_hermit_integral_seq_beta (k,i) += density * ao_two_e_tc_tot(l,j,k,i) + !! rho_a(l,j) * < l k| T | i j> + !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) + !! rho_b(l,j) * < l k| T | i j> + !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) !! rho(l,j) * < k l| T | i j> !two_e_tc_non_hermit_integral_alpha(k,i) += density * ao_two_e_tc_tot(l,j,k,i) @@ -41,32 +51,106 @@ !two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) ! rho(l,j) * < k l| T | i j> - two_e_tc_non_hermit_integral_alpha(k,i) += density * ao_two_e_tc_tot(k,i,l,j) + two_e_tc_non_hermit_integral_seq_alpha(k,i) += density * ao_two_e_tc_tot(k,i,l,j) ! rho(l,j) * < k l| T | i j> - two_e_tc_non_hermit_integral_beta (k,i) += density * ao_two_e_tc_tot(k,i,l,j) + two_e_tc_non_hermit_integral_seq_beta (k,i) += density * ao_two_e_tc_tot(k,i,l,j) ! rho_a(l,j) * < k l| T | j i> - two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) + two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) ! rho_b(l,j) * < k l| T | j i> - two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) + two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) enddo enddo -!!$OMP END DO -!!$OMP END PARALLEL enddo enddo + !call wall_time(t1) + !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0 + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)] + + BEGIN_DOC + ! + ! two_e_tc_non_hermit_integral_alpha(k,i) = + ! + ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions + ! + END_DOC + + implicit none + integer :: i, j, k, l + double precision :: density, density_a, density_b, I_coul, I_kjli + double precision :: t0, t1 + double precision, allocatable :: tmp_a(:,:), tmp_b(:,:) + + !print*, ' providing two_e_tc_non_hermit_integral ...' + !call wall_time(t0) + + two_e_tc_non_hermit_integral_alpha = 0.d0 + two_e_tc_non_hermit_integral_beta = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & + !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & + !$OMP two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta) + + allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num)) + tmp_a = 0.d0 + tmp_b = 0.d0 + + !$OMP DO + do j = 1, ao_num + do l = 1, ao_num + density_a = TCSCF_density_matrix_ao_alpha(l,j) + density_b = TCSCF_density_matrix_ao_beta (l,j) + density = density_a + density_b + do i = 1, ao_num + do k = 1, ao_num + + I_coul = density * ao_two_e_tc_tot(k,i,l,j) + I_kjli = ao_two_e_tc_tot(k,j,l,i) + + tmp_a(k,i) += I_coul - density_a * I_kjli + tmp_b(k,i) += I_coul - density_b * I_kjli + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do i = 1, ao_num + do j = 1, ao_num + two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i) + two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp_a, tmp_b) + !$OMP END PARALLEL + + !call wall_time(t1) + !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0 + END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)] - implicit none + BEGIN_DOC - ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis + ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis END_DOC - Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot & - + two_e_tc_non_hermit_integral_alpha + + implicit none + + Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha END_PROVIDER @@ -75,12 +159,12 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)] BEGIN_DOC - ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis + ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis END_DOC + implicit none - Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot & - + two_e_tc_non_hermit_integral_beta + Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta END_PROVIDER @@ -171,25 +255,38 @@ END_PROVIDER do i = 1, elec_beta_num ! doc --> SOMO do k = elec_beta_num+1, elec_alpha_num - grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) - grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i))) + grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k))) + !grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) + !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) + !grad_non_hermit_left += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i) + !grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k) enddo enddo do i = 1, elec_beta_num ! doc --> virt do k = elec_alpha_num+1, mo_num - grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) - grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i))) + grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k))) + !grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) + !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i) + grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k) enddo enddo do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt do k = elec_alpha_num+1, mo_num - grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) - grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i))) + grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k))) + !grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) + !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i) + grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k) enddo enddo + !grad_non_hermit = dsqrt(grad_non_hermit_left) + dsqrt(grad_non_hermit_right) grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right END_PROVIDER diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f index 3901f707..424eeffd 100644 --- a/src/tc_scf/fock_three.irp.f +++ b/src/tc_scf/fock_three.irp.f @@ -79,6 +79,8 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf] double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb + PROVIDE mo_l_coef mo_r_coef + !print *, ' providing diag_three_elem_hf' if(.not. three_body_h_tc) then diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f new file mode 100644 index 00000000..306c78b3 --- /dev/null +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -0,0 +1,362 @@ +! --- + +subroutine rh_tcscf_diis() + + implicit none + + integer :: i, j, it + integer :: dim_DIIS, index_dim_DIIS + double precision :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta + double precision :: tc_grad, g_save, g_delta, g_delta_th + double precision :: level_shift_save, rate_th + double precision :: t0, t1 + double precision :: er_DIIS, er_delta, er_save, er_delta_th + double precision, allocatable :: F_DIIS(:,:,:), E_DIIS(:,:,:) + double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:) + + logical, external :: qp_stop + + it = 0 + e_save = 0.d0 + dim_DIIS = 0 + g_delta_th = 1d0 + er_delta_th = 1d0 + rate_th = 100.d0 !0.01d0 !0.2d0 + + allocate(mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num)) + mo_l_coef_save = 0.d0 + mo_r_coef_save = 0.d0 + + allocate(F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), E_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF)) + F_DIIS = 0.d0 + E_DIIS = 0.d0 + + call write_time(6) + + ! --- + + PROVIDE level_shift_TCSCF + PROVIDE mo_l_coef mo_r_coef + + write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & + '====', '================', '================', '================', '================', '================' & + , '================', '================', '================', '====', '========' + + write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & + ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & + , ' gradient ', ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' + + write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & + '====', '================', '================', '================', '================', '================' & + , '================', '================', '================', '====', '========' + + + ! first iteration (HF orbitals) + call wall_time(t0) + + etc_tot = TC_HF_energy + etc_1e = TC_HF_one_e_energy + etc_2e = TC_HF_two_e_energy + etc_3e = 0.d0 + if(three_body_h_tc) then + etc_3e = diag_three_elem_hf + endif + tc_grad = grad_non_hermit + er_DIIS = maxval(abs(FQS_SQF_mo)) + e_delta = dabs(etc_tot - e_save) + + e_save = etc_tot + g_save = tc_grad + er_save = er_DIIS + + call wall_time(t1) + write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & + it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + + ! --- + + PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot + + do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. threshold_DIIS_nonzero_TCSCF)) + + call wall_time(t0) + + it += 1 + if(it > n_it_TCSCF_max) then + print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max + stop + endif + + dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF) + + ! --- + + if(dabs(e_delta) > 1.d-12) then + + index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1 + do j = 1, ao_num + do i = 1, ao_num + F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j) + E_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao (i,j) + enddo + enddo + + call extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), it, dim_DIIS) + + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) & + , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) ) + TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot + endif + + ! --- + + mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num) + !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + TOUCH mo_l_coef mo_r_coef + + ! --- + + g_delta = grad_non_hermit - g_save + er_delta = maxval(abs(FQS_SQF_mo)) - er_save + + !if((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1)) then + if((g_delta > rate_th * g_delta_th) .and. (it > 1)) then + !if((g_delta > 0.d0) .and. (it > 1)) then + + Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) & + , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) ) + TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot + + mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num) + !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + TOUCH mo_l_coef mo_r_coef + + endif + + ! --- + + g_delta = grad_non_hermit - g_save + er_delta = maxval(abs(FQS_SQF_mo)) - er_save + + mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num) + mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num) + + !do while((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1)) + do while((g_delta > rate_th * g_delta_th) .and. (it > 1)) + print *, ' big or bad step : ', g_delta, rate_th * g_delta_th + + mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) + if(level_shift_TCSCF <= .1d0) then + level_shift_TCSCF = 1.d0 + else + level_shift_TCSCF = level_shift_TCSCF * 3.0d0 + endif + TOUCH mo_l_coef mo_r_coef level_shift_TCSCF + + mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num) + !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + TOUCH mo_l_coef mo_r_coef + + g_delta = grad_non_hermit - g_save + er_delta = maxval(abs(FQS_SQF_mo)) - er_save + + if(level_shift_TCSCF - level_shift_save > 40.d0) then + level_shift_TCSCF = level_shift_save * 4.d0 + SOFT_TOUCH level_shift_TCSCF + exit + endif + + dim_DIIS = 0 + enddo + + ! --- + + level_shift_TCSCF = level_shift_TCSCF * 0.5d0 + SOFT_TOUCH level_shift_TCSCF + + etc_tot = TC_HF_energy + etc_1e = TC_HF_one_e_energy + etc_2e = TC_HF_two_e_energy + etc_3e = 0.d0 + if(three_body_h_tc) then + etc_3e = diag_three_elem_hf + endif + tc_grad = grad_non_hermit + er_DIIS = maxval(abs(FQS_SQF_mo)) + e_delta = dabs(etc_tot - e_save) + g_delta = tc_grad - g_save + er_delta = er_DIIS - er_save + + e_save = etc_tot + g_save = tc_grad + level_shift_save = level_shift_TCSCF + er_save = er_DIIS + + g_delta_th = dabs(tc_grad) ! g_delta) + er_delta_th = dabs(er_DIIS) !er_delta) + + call wall_time(t1) + write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & + it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + + if(g_delta .lt. 0.d0) then + call ezfio_set_tc_scf_bitc_energy(etc_tot) + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + endif + + if(qp_stop()) exit + enddo + + ! --- + + print *, ' TCSCF DIIS converged !' + call print_energy_and_mos() + + call write_time(6) + + deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS) + + call ezfio_set_tc_scf_bitc_energy(TC_HF_energy) + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + +end + +! --- + +subroutine extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, F_ao, size_F_ao, it, dim_DIIS) + + BEGIN_DOC + ! + ! Compute the extrapolated Fock matrix using the DIIS procedure + ! + ! e = \sum_i c_i e_i and \sum_i c_i = 1 + ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1) + ! + END_DOC + + implicit none + + integer, intent(in) :: it, size_F_ao + integer, intent(inout) :: dim_DIIS + double precision, intent(in) :: F_DIIS(ao_num,ao_num,dim_DIIS) + double precision, intent(in) :: E_DIIS(ao_num,ao_num,dim_DIIS) + double precision, intent(inout) :: F_ao(size_F_ao,ao_num) + + double precision, allocatable :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:) + + integer :: i, j, k, l, i_DIIS, j_DIIS + integer :: lwork + double precision :: rcond, ferr, berr + integer, allocatable :: iwork(:) + double precision, allocatable :: scratch(:,:) + + if(dim_DIIS < 1) then + return + endif + + allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) & + , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) ) + + ! Compute the matrices B and X + B_matrix_DIIS(:,:) = 0.d0 + do j = 1, dim_DIIS + j_DIIS = min(dim_DIIS, mod(it-j, max_dim_DIIS_TCSCF)+1) + + do i = 1, dim_DIIS + i_DIIS = min(dim_DIIS, mod(it-i, max_dim_DIIS_TCSCF)+1) + + ! Compute product of two errors vectors + do l = 1, ao_num + do k = 1, ao_num + B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + E_DIIS(k,l,i_DIIS) * E_DIIS(k,l,j_DIIS) + enddo + enddo + + enddo + enddo + + ! Pad B matrix and build the X matrix + + C_vector_DIIS(:) = 0.d0 + do i = 1, dim_DIIS + B_matrix_DIIS(i,dim_DIIS+1) = -1.d0 + B_matrix_DIIS(dim_DIIS+1,i) = -1.d0 + enddo + C_vector_DIIS(dim_DIIS+1) = -1.d0 + + deallocate(scratch) + + ! Estimate condition number of B + integer :: info + double precision :: anorm + integer, allocatable :: ipiv(:) + double precision, allocatable :: AF(:,:) + double precision, external :: dlange + + lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5) + allocate(AF(dim_DIIS+1,dim_DIIS+1)) + allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) ) + allocate(scratch(lwork,1)) + scratch(:,1) = 0.d0 + + anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1)) + + AF(:,:) = B_matrix_DIIS(:,:) + call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info) + if(info /= 0) then + dim_DIIS = 0 + return + endif + + call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info) + if(info /= 0) then + dim_DIIS = 0 + return + endif + + if(rcond < 1.d-14) then + dim_DIIS = 0 + return + endif + + ! solve the linear system C = B x X + + X_vector_DIIS = C_vector_DIIS + call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info) + + deallocate(scratch, AF, iwork) + if(info < 0) then + stop ' bug in TC-DIIS' + endif + + ! Compute extrapolated Fock matrix + + !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200) + do j = 1, ao_num + do i = 1, ao_num + F_ao(i,j) = 0.d0 + enddo + do k = 1, dim_DIIS + if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle + do i = 1,ao_num + ! FPE here + F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +end + +! --- + diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f new file mode 100644 index 00000000..30798e3d --- /dev/null +++ b/src/tc_scf/rh_tcscf_simple.irp.f @@ -0,0 +1,129 @@ +! --- + +subroutine rh_tcscf_simple() + + implicit none + integer :: i, j, it, dim_DIIS + double precision :: t0, t1 + double precision :: e_save, e_delta, rho_delta + double precision :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad + double precision :: er_DIIS + double precision, allocatable :: rho_old(:,:), rho_new(:,:) + + allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num)) + + it = 0 + e_save = 0.d0 + dim_DIIS = 0 + + ! --- + + if(.not. bi_ortho) then + print *, ' grad_hermit = ', grad_hermit + call save_good_hermit_tc_eigvectors + TOUCH mo_coef + call save_mos + endif + + ! --- + + if(bi_ortho) then + + PROVIDE level_shift_tcscf + PROVIDE mo_l_coef mo_r_coef + + write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & + '====', '================', '================', '================', '================', '================' & + , '================', '================', '================', '====', '========' + + write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & + ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & + , ' gradient ', ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' + + write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & + '====', '================', '================', '================', '================', '================' & + , '================', '================', '================', '====', '========' + + + ! first iteration (HF orbitals) + call wall_time(t0) + + etc_tot = TC_HF_energy + etc_1e = TC_HF_one_e_energy + etc_2e = TC_HF_two_e_energy + etc_3e = 0.d0 + if(three_body_h_tc) then + etc_3e = diag_three_elem_hf + endif + tc_grad = grad_non_hermit + er_DIIS = maxval(abs(FQS_SQF_mo)) + e_delta = dabs(etc_tot - e_save) + e_save = etc_tot + + call wall_time(t1) + write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & + it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + + do while(tc_grad .gt. dsqrt(thresh_tcscf)) + call wall_time(t0) + + it += 1 + if(it > n_it_tcscf_max) then + print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max + stop + endif + + mo_l_coef = fock_tc_leigvec_ao + mo_r_coef = fock_tc_reigvec_ao + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + TOUCH mo_l_coef mo_r_coef + + etc_tot = TC_HF_energy + etc_1e = TC_HF_one_e_energy + etc_2e = TC_HF_two_e_energy + etc_3e = 0.d0 + if(three_body_h_tc) then + etc_3e = diag_three_elem_hf + endif + tc_grad = grad_non_hermit + er_DIIS = maxval(abs(FQS_SQF_mo)) + e_delta = dabs(etc_tot - e_save) + e_save = etc_tot + + call ezfio_set_tc_scf_bitc_energy(etc_tot) + + call wall_time(t1) + write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & + it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + enddo + + else + + do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) ) + print*,'grad_hermit = ',grad_hermit + it += 1 + print *, 'iteration = ', it + print *, '***' + print *, 'TC HF total energy = ', TC_HF_energy + print *, 'TC HF 1 e energy = ', TC_HF_one_e_energy + print *, 'TC HF 2 e energy = ', TC_HF_two_e_energy + print *, 'TC HF 3 body = ', diag_three_elem_hf + print *, '***' + print *, '' + call save_good_hermit_tc_eigvectors + TOUCH mo_coef + call save_mos + enddo + + endif + + print *, ' TCSCF Simple converged !' + call print_energy_and_mos() + + deallocate(rho_old, rho_new) + +end + +! --- + diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f index d53991ed..fc4a7935 100644 --- a/src/tc_scf/rotate_tcscf_orbitals.irp.f +++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f @@ -260,14 +260,10 @@ subroutine fix_right_to_one() integer :: i, j, m, n, mm, tot_deg double precision :: accu_d, accu_nd double precision :: de_thr, ei, ej, de - double precision :: thr_d, thr_nd integer, allocatable :: deg_num(:) double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:) double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:) - thr_d = 1d-7 - thr_nd = 1d-7 - n = ao_num m = mo_num @@ -340,7 +336,7 @@ subroutine fix_right_to_one() ! --- call impose_weighted_orthog_svd(n, mm, W, R) - call impose_weighted_biorthog_qr(n, mm, thr_d, thr_nd, R, W, L) + call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L) ! --- @@ -353,7 +349,7 @@ subroutine fix_right_to_one() endif enddo - call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thr_d, thr_nd, .true.) + call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.) deallocate(W, deg_num) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 15264768..596ae500 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -259,7 +259,7 @@ subroutine orthog_functions(m, n, coef, overlap) double precision, intent(in) :: overlap(m,m) double precision, intent(inout) :: coef(m,n) double precision, allocatable :: stmp(:,:) - integer :: j + integer :: j, k allocate(stmp(n,n)) call build_s_matrix(m, n, coef, coef, overlap, stmp) @@ -270,7 +270,13 @@ subroutine orthog_functions(m, n, coef, overlap) call impose_orthog_svd_overlap(m, n, coef, overlap) call build_s_matrix(m, n, coef, coef, overlap, stmp) do j = 1, n - coef(1,:m) *= 1.d0/dsqrt(stmp(j,j)) + ! --- + ! TODO: MANU check ici + !coef(1,:m) *= 1.d0/dsqrt(stmp(j,j)) + do k = 1, m + coef(k,j) *= 1.d0/dsqrt(stmp(j,j)) + enddo + ! --- enddo call build_s_matrix(m, n, coef, coef, overlap, stmp) diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index fd11c48e..deaf8d82 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -1,7 +1,9 @@ +! --- + program tc_scf BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC implicit none @@ -15,14 +17,20 @@ program tc_scf ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call create_guess() - call orthonormalize_mos() + PROVIDE mu_erf + print *, ' mu = ', mu_erf + PROVIDE j1b_type + print *, ' j1b_type = ', j1b_type + print *, j1b_pen + + !call create_guess() + !call orthonormalize_mos() PROVIDE tcscf_algorithm if(tcscf_algorithm == 'DIIS') then - call rh_tcscf() + call rh_tcscf_diis() elseif(tcscf_algorithm == 'Simple') then - call simple_tcscf() + call rh_tcscf_simple() else print *, ' not implemented yet', tcscf_algorithm stop @@ -35,11 +43,7 @@ end ! --- -subroutine create_guess - - BEGIN_DOC - ! Create a MO guess if no MOs are present in the EZFIO directory - END_DOC +subroutine create_guess() implicit none logical :: exists @@ -48,19 +52,16 @@ subroutine create_guess !call ezfio_has_mo_basis_mo_coef(exists) exists = .false. - if (.not.exists) then + if(.not.exists) then mo_label = 'Guess' - if (mo_guess_type == "HCore") then + if(mo_guess_type == "HCore") then mo_coef = ao_ortho_lowdin_coef call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10) TOUCH mo_coef - call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, & - size(mo_one_e_integrals,1), & - size(mo_one_e_integrals,2), & - mo_label,1,.false.) - call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10) + call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, size(mo_one_e_integrals, 1), size(mo_one_e_integrals, 2), mo_label, 1, .false.) + call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10) SOFT_TOUCH mo_coef - else if (mo_guess_type == "Huckel") then + elseif (mo_guess_type == "Huckel") then call huckel_guess else print *, 'Unrecognized MO guess type : '//mo_guess_type @@ -72,198 +73,3 @@ subroutine create_guess end subroutine create_guess ! --- - -subroutine simple_tcscf() - - implicit none - integer :: i, j, it - double precision :: e_save, e_delta, rho_delta - double precision, allocatable :: rho_old(:,:), rho_new(:,:) - - allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num)) - - it = 0 - print*,'iteration = ', it - - !print*,'grad_hermit = ', grad_hermit - print*,'***' - print*,'TC HF total energy = ', TC_HF_energy - print*,'TC HF 1 e energy = ', TC_HF_one_e_energy - print*,'TC HF 2 e energy = ', TC_HF_two_e_energy - if(three_body_h_tc) then - print*,'TC HF 3 body = ', diag_three_elem_hf - endif - print*,'***' - e_delta = 10.d0 - e_save = 0.d0 !TC_HF_energy - rho_delta = 10.d0 - - - if(bi_ortho)then - - mo_l_coef = fock_tc_leigvec_ao - mo_r_coef = fock_tc_reigvec_ao - rho_old = TCSCF_bi_ort_dm_ao - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - - else - - print *, ' grad_hermit = ', grad_hermit - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - - endif - - ! --- - - if(bi_ortho) then - - !do while(e_delta .gt. dsqrt(thresh_tcscf)) ) - !do while(e_delta .gt. thresh_tcscf) ) - !do while(rho_delta .gt. thresh_tcscf) ) - !do while(grad_non_hermit_right .gt. dsqrt(thresh_tcscf)) - do while(grad_non_hermit .gt. dsqrt(thresh_tcscf)) - - it += 1 - if(it > n_it_tcscf_max) then - print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max - stop - endif - - - print *, ' ***' - print *, ' iteration = ', it - - print *, ' TC HF total energy = ', TC_HF_energy - print *, ' TC HF 1 e energy = ', TC_HF_one_e_energy - print *, ' TC HF 2 non hermit = ', TC_HF_two_e_energy - if(three_body_h_tc) then - print *, ' TC HF 3 body = ', diag_three_elem_hf - endif - e_delta = dabs(TC_HF_energy - e_save) - - print *, ' delta E = ', e_delta - print *, ' gradient = ', grad_non_hermit - print *, ' max TC DIIS error = ', maxval(abs(FQS_SQF_mo)) - - !print *, ' gradient= ', grad_non_hermit_right - - !rho_new = TCSCF_bi_ort_dm_ao - !!print*, rho_new - !rho_delta = 0.d0 - !do i = 1, ao_num - ! do j = 1, ao_num - ! rho_delta += dabs(rho_new(j,i) - rho_old(j,i)) - ! enddo - !enddo - !print *, ' rho_delta =', rho_delta - !rho_old = rho_new - - e_save = TC_HF_energy - mo_l_coef = fock_tc_leigvec_ao - mo_r_coef = fock_tc_reigvec_ao - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - call ezfio_set_tc_scf_bitc_energy(TC_HF_energy) - - call test_fock_3e_uhf_mo() - - print *, ' ***' - print *, '' - - enddo - - else - do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. it .lt. n_it_tcscf_max ) - print*,'grad_hermit = ',grad_hermit - it += 1 - print *, 'iteration = ', it - print *, '***' - print *, 'TC HF total energy = ', TC_HF_energy - print *, 'TC HF 1 e energy = ', TC_HF_one_e_energy - print *, 'TC HF 2 e energy = ', TC_HF_two_e_energy - print *, 'TC HF 3 body = ', diag_three_elem_hf - print *, '***' - print *, '' - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - - enddo - - endif - - print *, ' TCSCF Simple converged !' - call print_energy_and_mos() - - deallocate(rho_old, rho_new) - -end subroutine simple_tcscf - -! --- - -subroutine test_fock_3e_uhf_mo() - - implicit none - integer :: i, j - double precision :: diff_tot, diff_ij, thr_ih, norm - - thr_ih = 1d-12 - - PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - !print *, ' difference on ', j, i - !print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i) - !print *, ' UHF : ', fock_3e_uhf_mo_a (j,i) - !stop - endif - - norm += dabs(fock_a_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_a = ', diff_tot / norm - print *, ' norm_a = ', norm - print *, ' ' - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - !print *, ' difference on ', j, i - !print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i) - !print *, ' UHF : ', fock_3e_uhf_mo_b (j,i) - !stop - endif - - norm += dabs(fock_b_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_b = ', diff_tot/norm - print *, ' norm_b = ', norm - print *, ' ' - - ! --- - -end subroutine test_fock_3e_uhf_mo() - diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f index 1f054a30..90719f47 100644 --- a/src/tc_scf/tc_scf_dm.irp.f +++ b/src/tc_scf/tc_scf_dm.irp.f @@ -1,8 +1,11 @@ ! --- BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ] + implicit none + if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta else TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta @@ -12,8 +15,11 @@ END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ] + implicit none - if(bi_ortho)then + + if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha else TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f index c60ce761..611b8b4c 100644 --- a/src/tc_scf/tc_scf_energy.irp.f +++ b/src/tc_scf/tc_scf_energy.irp.f @@ -10,6 +10,8 @@ implicit none integer :: i, j + PROVIDE mo_l_coef mo_r_coef + TC_HF_energy = nuclear_repulsion TC_HF_one_e_energy = 0.d0 TC_HF_two_e_energy = 0.d0 diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index ae88dac3..a14c4126 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -25,7 +25,7 @@ program test_ints !! OK !call routine_v_ij_erf_rk_cst_mu_j1b !! OK -! call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b +! call routine_x_v_ij_erf_rk_cst_mu_j1b !! OK ! call routine_v_ij_u_cst_mu_j1b @@ -43,10 +43,15 @@ program test_ints ! call test_ao_tc_int_chemist ! call test_grid_points_ao ! call test_tc_scf - call test_int_gauss + !call test_int_gauss !call test_fock_3e_uhf_ao() - call test_fock_3e_uhf_mo() + !call test_fock_3e_uhf_mo() + + !call test_tc_grad_and_lapl_ao() + !call test_tc_grad_square_ao() + + call test_two_e_tc_non_hermit_integral() end @@ -56,14 +61,8 @@ subroutine test_tc_scf implicit none integer :: i ! provide int2_u_grad1u_x_j1b2_test - provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test -! do i = 1, ng_fit_jast -! print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i) -! enddo -! provide tc_grad_square_ao_test -! provide tc_grad_and_lapl_ao_test -! provide int2_u_grad1u_x_j1b2_test -! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test + provide x_v_ij_erf_rk_cst_mu_j1b_test +! provide x_v_ij_erf_rk_cst_mu_j1b_test ! print*,'TC_HF_energy = ',TC_HF_energy ! print*,'grad_non_hermit = ',grad_non_hermit end @@ -212,7 +211,7 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b end -subroutine routine_x_v_ij_erf_rk_cst_mu_tmp_j1b +subroutine routine_x_v_ij_erf_rk_cst_mu_j1b implicit none integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib @@ -242,8 +241,8 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_tmp_j1b do i = 1, ao_num do j = 1, ao_num do m = 1, 3 - array(j,i,l,k) += x_v_ij_erf_rk_cst_mu_tmp_j1b_test(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_tmp_j1b(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight enddo enddo enddo @@ -500,8 +499,8 @@ subroutine routine_int2_u_grad1u_x_j1b2 do i = 1, ao_num do j = 1, ao_num do m = 1, 3 - array(j,i,l,k) += int2_u_grad1u_x_j1b2_test(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array(j,i,l,k) += int2_u_grad1u_x_j1b2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2 (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight enddo enddo enddo @@ -708,7 +707,7 @@ subroutine test_fock_3e_uhf_mo() ! --- -end subroutine test_fock_3e_uhf_mo() +end subroutine test_fock_3e_uhf_mo ! --- @@ -776,9 +775,9 @@ subroutine test_grid_points_ao icount_bad = 0 icount_full = 0 do ipoint = 1, n_points_final_grid -! if(dabs(int2_u_grad1u_x_j1b2_test(1,j,i,ipoint)) & -! + dabs(int2_u_grad1u_x_j1b2_test(2,j,i,ipoint)) & -! + dabs(int2_u_grad1u_x_j1b2_test(2,j,i,ipoint)) ) +! if(dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,1)) & +! + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,2)) & +! + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,3)) ) ! if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then ! icount += 1 ! endif @@ -848,3 +847,157 @@ subroutine test_int_gauss end +! --- + +subroutine test_tc_grad_and_lapl_ao() + + implicit none + integer :: i, j, k, l + double precision :: diff_tot, diff, thr_ih, norm + + thr_ih = 1d-10 + + PROVIDE tc_grad_and_lapl_ao tc_grad_and_lapl_ao_loop + + norm = 0.d0 + diff_tot = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + + diff = dabs(tc_grad_and_lapl_ao_loop(l,k,j,i) - tc_grad_and_lapl_ao(l,k,j,i)) + if(diff .gt. thr_ih) then + print *, ' difference on ', l, k, j, i + print *, ' loops : ', tc_grad_and_lapl_ao_loop(l,k,j,i) + print *, ' lapack: ', tc_grad_and_lapl_ao (l,k,j,i) + !stop + endif + + norm += dabs(tc_grad_and_lapl_ao_loop(l,k,j,i)) + diff_tot += diff + enddo + enddo + enddo + enddo + + print *, ' diff tot = ', diff_tot / norm + print *, ' norm = ', norm + print *, ' ' + + return + +end + +! --- + +subroutine test_tc_grad_square_ao() + + implicit none + integer :: i, j, k, l + double precision :: diff_tot, diff, thr_ih, norm + + thr_ih = 1d-10 + + PROVIDE tc_grad_square_ao tc_grad_square_ao_loop + + norm = 0.d0 + diff_tot = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + + diff = dabs(tc_grad_square_ao_loop(l,k,j,i) - tc_grad_square_ao(l,k,j,i)) + if(diff .gt. thr_ih) then + print *, ' difference on ', l, k, j, i + print *, ' loops : ', tc_grad_square_ao_loop(l,k,j,i) + print *, ' lapack: ', tc_grad_square_ao (l,k,j,i) + !stop + endif + + norm += dabs(tc_grad_square_ao_loop(l,k,j,i)) + diff_tot += diff + enddo + enddo + enddo + enddo + + print *, ' diff tot = ', diff_tot / norm + print *, ' norm = ', norm + print *, ' ' + + return + +end + +! --- + +subroutine test_two_e_tc_non_hermit_integral() + + implicit none + integer :: i, j + double precision :: diff_tot, diff, thr_ih, norm + + thr_ih = 1d-10 + + PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha + PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha + + ! --- + + norm = 0.d0 + diff_tot = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + + diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i)) + if(diff .gt. thr_ih) then + print *, ' difference on ', j, i + print *, ' seq : ', two_e_tc_non_hermit_integral_seq_alpha(j,i) + print *, ' // : ', two_e_tc_non_hermit_integral_alpha (j,i) + !stop + endif + + norm += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i)) + diff_tot += diff + enddo + enddo + + print *, ' diff tot a = ', diff_tot / norm + print *, ' norm a = ', norm + print *, ' ' + + ! --- + + norm = 0.d0 + diff_tot = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + + diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i)) + if(diff .gt. thr_ih) then + print *, ' difference on ', j, i + print *, ' seq : ', two_e_tc_non_hermit_integral_seq_beta(j,i) + print *, ' // : ', two_e_tc_non_hermit_integral_beta (j,i) + !stop + endif + + norm += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i)) + diff_tot += diff + enddo + enddo + + print *, ' diff tot b = ', diff_tot / norm + print *, ' norm b = ', norm + print *, ' ' + + ! --- + + return + +end + +! --- + +>>>>>>> 92a4e33f8a21717cab0c0e4f8412ed6903afb04a diff --git a/src/tools/print_he_energy.irp.f b/src/tools/print_he_energy.irp.f index 87488fba..8daa2b8b 100644 --- a/src/tools/print_he_energy.irp.f +++ b/src/tools/print_he_energy.irp.f @@ -7,8 +7,8 @@ program print_he_energy call print_overlap() - call print_energy1() - call print_energy2() + !call print_energy1() + !call print_energy2() end diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 38a8cad2..1e33c7dc 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1136,7 +1136,6 @@ subroutine ortho_svd(A,LDA,m,n) end -! QR to orthonormalize CSFs does not work :-( !subroutine ortho_qr_withB(A,LDA,B,m,n) ! implicit none ! BEGIN_DOC @@ -1223,7 +1222,7 @@ end ! ! !deallocate(WORK,TAU) !end - +! !subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf") ! use iso_c_binding ! integer(c_int32_t), value :: LDA @@ -1234,6 +1233,7 @@ end ! call ortho_qr_withB(A,LDA,B,m,n) !end subroutine ortho_qr_csf + subroutine ortho_qr(A,LDA,m,n) implicit none BEGIN_DOC @@ -1697,7 +1697,7 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) ! TODO: Costs O(n^4), but can be improved to (2 n^2 * log(n)): ! - copy all values in a 1D array ! - sort 1D array - ! - average nearby elements + ! - average nearby elements ! - for all elements, find matching value in the sorted 1D array allocate(done(m,n)) @@ -1800,7 +1800,7 @@ end ! A_tmp(i,k) = A(i,k) ! enddo ! enddo -! +! ! ! Find optimal size for temp arrays ! allocate(work(1)) ! lwork = -1 @@ -1836,7 +1836,7 @@ end ! endif ! ! deallocate(A_tmp,work) -! +! ! !do j=1, m ! ! do i=1, LDU ! ! if (dabs(U(i,j)) < 1.d-14) U(i,j) = 0.d0 @@ -1847,7 +1847,7 @@ end ! ! if (dabs(Vt(i,j)) < 1.d-14) Vt(i,j) = 0.d0 ! ! enddo ! !enddo -! +! !end ! @@ -1877,8 +1877,8 @@ subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim) enddo enddo - JOBVL = "N" ! computes the left eigenvectors - JOBVR = "V" ! computes the right eigenvectors + JOBVL = "N" ! computes the left eigenvectors + JOBVR = "V" ! computes the right eigenvectors BALANC = "B" ! Diagonal scaling and Permutation for optimization SENSE = "V" ! Determines which reciprocal condition numbers are computed lda = n @@ -1888,10 +1888,10 @@ subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim) allocate( WORK(1), SCALE_array(n), RCONDE(n), RCONDV(n), IWORK(2*n-2) ) LWORK = -1 ! to ask for the optimal size of WORK - call dgeevx( BALANC, JOBVL, JOBVR, SENSE & ! CHARACTERS + call dgeevx( BALANC, JOBVL, JOBVR, SENSE & ! CHARACTERS , n, Atmp, lda & ! MATRIX TO DIAGONALIZE - , WR, WI & ! REAL AND IMAGINARY PART OF EIGENVALUES - , VL, ldvl, VR, ldvr & ! LEFT AND RIGHT EIGENVECTORS + , WR, WI & ! REAL AND IMAGINARY PART OF EIGENVALUES + , VL, ldvl, VR, ldvr & ! LEFT AND RIGHT EIGENVECTORS , ILO, IHI, SCALE_array, ABNRM, RCONDE, RCONDV & ! OUTPUTS OF OPTIMIZATION , WORK, LWORK, IWORK, INFO ) @@ -1900,7 +1900,7 @@ subroutine diag_nonsym_right(n, A, A_ldim, V, V_ldim, energy, E_ldim) stop endif - LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK + LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK deallocate(WORK) allocate(WORK(LWORK)) call dgeevx( BALANC, JOBVL, JOBVR, SENSE & @@ -1982,6 +1982,8 @@ end subroutine diag_nonsym_right ! --- +! Taken from GammCor thanks to Michal Hapka :-) + subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! diff --git a/src/utils/qsort.c b/src/utils/qsort.c new file mode 100644 index 00000000..c011b35a --- /dev/null +++ b/src/utils/qsort.c @@ -0,0 +1,373 @@ +/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ +#include +#include + +struct int16_t_comp { + int16_t x; + int32_t i; +}; + +int compare_int16_t( const void * l, const void * r ) +{ + const int16_t * restrict _l= l; + const int16_t * restrict _r= r; + if( *_l > *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct double_comp* A = malloc(isize * sizeof(struct double_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct float_comp* A = malloc(isize * sizeof(struct float_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i> +""" +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("TYPE", typ).replace("_big", "") ) + print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f2 +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +* Generated C file + +#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes +#include +#include +<> +#+END_SRC + +* Generated Fortran file + +#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes +module qsort_module + use iso_c_binding + + interface + <> + end interface + +end module qsort_module + +<> + +#+END_SRC + diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 new file mode 100644 index 00000000..a72a4f9e --- /dev/null +++ b/src/utils/qsort_module.f90 @@ -0,0 +1,347 @@ +module qsort_module + use iso_c_binding + + interface + + subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_c + + subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_c + + + + subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_big_c + + subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_big_c + + + + subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_c + + subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_c + + + + subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_big_c + + subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_big_c + + + + subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_c + + subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_c + + + + subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_big_c + + subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_big_c + + + + subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_c + + subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_c + + + + subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_big_c + + subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_big_c + + + + subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_c + + subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_c + + + + subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_big_c + + subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_big_c + + + + end interface + +end module qsort_module + + +subroutine i2sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_c(A, iorder, isize) +end subroutine i2sort + +subroutine i2sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_c(A, isize) +end subroutine i2sort_noidx + + + +subroutine i2sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_big_c(A, iorder, isize) +end subroutine i2sort_big + +subroutine i2sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_big_c(A, isize) +end subroutine i2sort_noidx_big + + + +subroutine isort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_c(A, iorder, isize) +end subroutine isort + +subroutine isort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_c(A, isize) +end subroutine isort_noidx + + + +subroutine isort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_big_c(A, iorder, isize) +end subroutine isort_big + +subroutine isort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_big_c(A, isize) +end subroutine isort_noidx_big + + + +subroutine i8sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_c(A, iorder, isize) +end subroutine i8sort + +subroutine i8sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_c(A, isize) +end subroutine i8sort_noidx + + + +subroutine i8sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_big_c(A, iorder, isize) +end subroutine i8sort_big + +subroutine i8sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_big_c(A, isize) +end subroutine i8sort_noidx_big + + + +subroutine dsort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_c(A, iorder, isize) +end subroutine dsort + +subroutine dsort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_c(A, isize) +end subroutine dsort_noidx + + + +subroutine dsort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_big_c(A, iorder, isize) +end subroutine dsort_big + +subroutine dsort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_big_c(A, isize) +end subroutine dsort_noidx_big + + + +subroutine sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_c(A, iorder, isize) +end subroutine sort + +subroutine sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_c(A, isize) +end subroutine sort_noidx + + + +subroutine sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_big_c(A, iorder, isize) +end subroutine sort_big + +subroutine sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_big_c(A, isize) +end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index ff40263c..089c3871 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,222 +1,4 @@ BEGIN_TEMPLATE - subroutine insertion_$Xsort (x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the insertion sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - $type :: xtmp - integer :: i, i0, j, jmax - - do i=2,isize - xtmp = x(i) - i0 = iorder(i) - j=i-1 - do while (j>0) - if ((x(j) <= xtmp)) exit - x(j+1) = x(j) - iorder(j+1) = iorder(j) - j=j-1 - enddo - x(j+1) = xtmp - iorder(j+1) = i0 - enddo - end subroutine insertion_$Xsort - - subroutine quick_$Xsort(x, iorder, isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the quicksort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer, external :: omp_get_num_threads - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - end - - recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) - implicit none - integer, intent(in) :: isize, first, last, level - integer,intent(inout) :: iorder(isize) - $type, intent(inout) :: x(isize) - $type :: c, tmp - integer :: itmp - integer :: i, j - - if(isize<2)return - - c = x( shiftr(first+last,1) ) - i = first - j = last - do - do while (x(i) < c) - i=i+1 - end do - do while (c < x(j)) - j=j-1 - end do - if (i >= j) exit - tmp = x(i) - x(i) = x(j) - x(j) = tmp - itmp = iorder(i) - iorder(i) = iorder(j) - iorder(j) = itmp - i=i+1 - j=j-1 - enddo - if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - else - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - endif - end - - subroutine heap_$Xsort(x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the heap sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - - integer :: i, k, j, l, i0 - $type :: xtemp - - l = isize/2+1 - k = isize - do while (.True.) - if (l>1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j0_8) - if (x(j)<=xtmp) exit - x(j+1_8) = x(j) - iorder(j+1_8) = iorder(j) - j = j-1_8 - enddo - x(j+1_8) = xtmp - iorder(j+1_8) = i0 - enddo - - end subroutine insertion_$Xsort_big - subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -565,223 +90,3 @@ SUBST [ X, type ] END_TEMPLATE -BEGIN_TEMPLATE - -recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) - implicit none - - BEGIN_DOC - ! Sort integer array x(isize) using the radix sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - ! iradix should be -1 in input. - END_DOC - integer*$int_type, intent(in) :: isize - integer*$int_type, intent(inout) :: iorder(isize) - integer*$type, intent(inout) :: x(isize) - integer, intent(in) :: iradix - integer :: iradix_new - integer*$type, allocatable :: x2(:), x1(:) - integer*$type :: i4 ! data type - integer*$int_type, allocatable :: iorder1(:),iorder2(:) - integer*$int_type :: i0, i1, i2, i3, i ! index type - integer*$type :: mask - integer :: err - !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 - - if (isize < 2) then - return - endif - - if (iradix == -1) then ! Sort Positive and negative - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - do i=1_$int_type,isize - if (x(i) < 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = -x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i2 - iorder(i1+i) = iorder2(i) - x(i1+i) = x2(i) - enddo - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i1 > 1_$int_type) then - call $Xradix_sort$big(x1,iorder1,i1,-2) - do i=1_$int_type,i1 - x(i) = -x1(1_$int_type+i1-i) - iorder(i) = iorder1(1_$int_type+i1-i) - enddo - endif - - if (i2>1_$int_type) then - call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) - endif - - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - return - - else if (iradix == -2) then ! Positive - - ! Find most significant bit - - i0 = 0_$int_type - i4 = maxval(x) - - iradix_new = max($integer_size-1-leadz(i4),1) - mask = ibset(0_$type,iradix_new) - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder1(i) - x(i0+i) = x1(i) - enddo - i0 = i0+i1 - i3 = i0 - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - - - do i=1_$int_type,i2 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - i0 = i0+i2 - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i3>1_$int_type) then - call $Xradix_sort$big(x,iorder,i3,iradix_new-1) - endif - - if (isize-i3>1_$int_type) then - call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) - endif - - return - endif - - ASSERT (iradix >= 0) - - if (isize < 48) then - call insertion_$Xsort$big(x,iorder,isize) - return - endif - - - allocate(x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x1, iorder1' - stop - endif - - - mask = ibset(0_$type,iradix) - i0=1_$int_type - i1=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder(i0) = iorder(i) - x(i0) = x(i) - i0 = i0+1_$int_type - else - iorder2(i1) = iorder(i) - x2(i1) = x(i) - i1 = i1+1_$int_type - endif - enddo - i0=i0-1_$int_type - i1=i1-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x2, iorder2' - stop - endif - - - if (iradix == 0) then - return - endif - - - if (i1>1_$int_type) then - call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) - endif - if (i0>1) then - call $Xradix_sort$big(x,iorder,i0,iradix-1) - endif - - end - -SUBST [ X, type, integer_size, is_big, big, int_type ] - i ; 4 ; 32 ; .False. ; ; 4 ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; -END_TEMPLATE - - -