diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 474412c9..6f012981 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -547,7 +547,6 @@ let terminate program_state rep_socket = let error msg program_state rep_socket = - Printf.printf "%s\n%!" msg; Message.Error (Message.Error_msg.create msg) |> Message.to_string |> ZMQ.Socket.send rep_socket ; diff --git a/ocaml/qp_create_guess.ml b/ocaml/qp_create_guess.ml new file mode 100644 index 00000000..62af57de --- /dev/null +++ b/ocaml/qp_create_guess.ml @@ -0,0 +1,141 @@ +open Qputils +open Qptypes +open Core.Std + +let run ~multiplicity ezfio_file = + if (not (Sys.file_exists_exn ezfio_file)) then + failwith ("EZFIO directory "^ezfio_file^" not found"); + Ezfio.set_file ezfio_file; + let d = + Input.Determinants_by_hand.read () + in + let m = + Multiplicity.of_int multiplicity + in + let ne = + Ezfio.get_electrons_elec_alpha_num () + + Ezfio.get_electrons_elec_beta_num () + |> Elec_number.of_int + in + let alpha, beta = + let (a,b) = + Multiplicity.to_alpha_beta ne m + in + (Elec_alpha_number.to_int a, Elec_beta_number.to_int b) + in + let n_open_shells = + alpha - beta + in + let mo_tot_num = + Ezfio.get_mo_basis_mo_tot_num () + in + let build_list_of_dets ne n_closed n_open = + let init = + Array.create ~len:n_closed Bit.One + |> Array.to_list + in + let rec set_electron accu = function + | 1 -> [ Bit.One :: accu ] + | i -> + assert (i>1); + let rest = + set_electron (Bit.Zero :: accu) (i-1) + in + (Bit.One::accu) :: rest + in + let rec extend accu = function + | 0 -> List.rev accu + | i -> extend (Bit.Zero::accu) (i-1) + in + let rec set_n_electrons accu imax = function + | 0 -> [] + | 1 -> set_electron accu imax + | i -> + assert (i>1); + let l = + set_electron accu (imax-1) + in + List.map ~f:(fun x -> set_n_electrons x (imax-1) (i-1)) l + |> List.concat + in + set_n_electrons init n_open ne + |> List.filter ~f:(fun x -> List.length x <= n_closed+n_open) + |> List.map ~f:(fun x -> extend x (((mo_tot_num-1)/64+1)*64 - List.length x)) + in + + let alpha_new = + (Elec_number.to_int ne + 1)/2 + and beta_new = + Elec_number.to_int ne/2 + in + let l_alpha = + build_list_of_dets ((alpha-beta+1)/2) beta n_open_shells + in + let l_beta = + if alpha_new = beta_new then + l_alpha + else + build_list_of_dets ((alpha-beta)/2)beta n_open_shells + in + + let n_int = + Bitlist.n_int_of_mo_tot_num mo_tot_num + in + let determinants = + List.map l_alpha ~f:(fun x -> List.map l_beta ~f:(fun y -> (x,y) )) + |> List.concat + |> List.map ~f:(fun pair -> Determinant.of_bitlist_couple ~n_int + ~alpha:(Elec_alpha_number.of_int alpha_new) + ~beta:(Elec_beta_number.of_int beta_new) pair ) + in + let c = + Array.create ~len:(List.length determinants) (Det_coef.of_float 1.) + in + + determinants + |> List.map ~f:(fun x -> Determinant.to_string ~mo_tot_num:(MO_number.of_int mo_tot_num) x) + |> List.iter ~f:(fun x -> Printf.printf "%s\n\n%!" x); + + let l = + List.length determinants + in + if l > 0 then + begin + let d = + let s = (Float.of_int (alpha - beta)) *. 0.5 in + let open Input.Determinants_by_hand in + { d with n_int ; + n_det = Det_number.of_int ~min:1 ~max:l l; + expected_s2 = Positive_float.of_float (s *. (s +. 1.)) ; + psi_coef = c; + psi_det = Array.of_list determinants; + } + in + Input.Determinants_by_hand.write d; + Ezfio.set_determinants_read_wf true + end + else + Ezfio.set_determinants_read_wf false + + + +let spec = + let open Command.Spec in + empty + +> flag "m" (required int) + ~doc:"int Spin multiplicity" + +> anon ("ezfio_file" %: string) + +let () = + Command.basic + ~summary: "Quantum Package command" + ~readme:( fun () -> " +Creates an open-shell multiplet initial guess\n\n" ) + spec + (fun multiplicity ezfio_file () -> + run ~multiplicity ezfio_file + ) + |> Command.run ~version: Git.sha1 ~build_info: Git.message + + + diff --git a/plugins/CAS_SD/H_apply.irp.f b/plugins/CAS_SD/H_apply.irp.f index 35c45fb6..f1d0c66b 100644 --- a/plugins/CAS_SD/H_apply.irp.f +++ b/plugins/CAS_SD/H_apply.irp.f @@ -3,6 +3,7 @@ BEGIN_SHELL [ /usr/bin/env python ] from generate_h_apply import * s = H_apply("CAS_SD") +s.unset_skip() print s s = H_apply("CAS_SD_selected_no_skip") @@ -12,6 +13,7 @@ print s s = H_apply("CAS_SD_selected") s.set_selection_pt2("epstein_nesbet_2x2") +s.unset_skip() print s s = H_apply("CAS_SD_PT2") @@ -22,13 +24,9 @@ print s s = H_apply("CAS_S",do_double_exc=False) print s -s = H_apply("CAS_S_selected_no_skip",do_double_exc=False) -s.set_selection_pt2("epstein_nesbet_2x2") -s.unset_skip() -print s - s = H_apply("CAS_S_selected",do_double_exc=False) s.set_selection_pt2("epstein_nesbet_2x2") +s.unset_skip() print s s = H_apply("CAS_S_PT2",do_double_exc=False) diff --git a/plugins/CAS_SD/cas_s_selected.irp.f b/plugins/CAS_SD/cas_s_selected.irp.f index 802de171..7c77b529 100644 --- a/plugins/CAS_SD/cas_s_selected.irp.f +++ b/plugins/CAS_SD/cas_s_selected.irp.f @@ -12,6 +12,7 @@ program full_ci pt2 = 1.d0 diag_algorithm = "Lapack" + if (N_det > N_det_max) then call diagonalize_CI call save_wavefunction @@ -28,49 +29,84 @@ program full_ci print *, 'E+PT2 = ', CI_energy+pt2 print *, '-----' endif + double precision :: i_H_psi_array(N_states),diag_H_mat_elem,h,i_O1_psi_array(N_states) + double precision :: E_CI_before(N_states) + if(read_wf)then + call i_H_psi(psi_det(1,1,N_det),psi_det,psi_coef,N_int,N_det,psi_det_size,N_states,i_H_psi_array) + h = diag_H_mat_elem(psi_det(1,1,N_det),N_int) + selection_criterion = dabs(psi_coef(N_det,1) * (i_H_psi_array(1) - h * psi_coef(N_det,1))) * 0.1d0 + soft_touch selection_criterion + endif + + integer :: n_det_before + print*,'Beginning the selection ...' + E_CI_before(1:N_states) = CI_energy(1:N_states) do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max) - call H_apply_CAS_S_selected_no_skip(pt2, norm_pert, H_pert_diag, N_st) + n_det_before = N_det + call H_apply_CAS_SD_selected(pt2, norm_pert, H_pert_diag, N_st) PROVIDE psi_coef PROVIDE psi_det PROVIDE psi_det_sorted - if (N_det > N_det_max) then - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - N_det = N_det_max - soft_touch N_det psi_det psi_coef - endif call diagonalize_CI + + if (N_det > N_det_max) then + N_det = N_det_max + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + touch N_det psi_det psi_coef psi_det_sorted psi_coef_sorted psi_average_norm_contrib_sorted + endif + + call save_wavefunction - print *, 'N_det = ', N_det - print *, 'N_states = ', N_states - print *, 'PT2 = ', pt2 - print *, 'E = ', CI_energy - print *, 'E+PT2 = ', CI_energy+pt2 + if(n_det_before == N_det)then + selection_criterion = selection_criterion * 0.5d0 + endif + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k = 1, N_states + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', CI_energy(k) + print *, 'E(before)+PT2 = ', E_CI_before(k)+pt2(k) + enddo print *, '-----' + if(N_states.gt.1)then + print*,'Variational Energy difference' + do i = 2, N_states + print*,'Delta E = ',CI_energy(i) - CI_energy(1) + enddo + endif + if(N_states.gt.1)then + print*,'Variational + perturbative Energy difference' + do i = 2, N_states + print*,'Delta E = ',E_CI_before(i)+ pt2(i) - (E_CI_before(1) + pt2(1)) + enddo + endif + E_CI_before(1:N_states) = CI_energy(1:N_states) call ezfio_set_cas_sd_energy(CI_energy(1)) enddo - call diagonalize_CI - + N_det = min(N_det_max,N_det) + touch N_det psi_det psi_coef + call diagonalize_CI if(do_pt2_end)then print*,'Last iteration only to compute the PT2' threshold_selectors = 1.d0 threshold_generators = 0.999d0 - call H_apply_CAS_S_PT2(pt2, norm_pert, H_pert_diag, N_st) + call H_apply_CAS_SD_PT2(pt2, norm_pert, H_pert_diag, N_st) print *, 'Final step' print *, 'N_det = ', N_det print *, 'N_states = ', N_states print *, 'PT2 = ', pt2 - print *, 'E = ', CI_energy - print *, 'E+PT2 = ', CI_energy+pt2 + print *, 'E = ', CI_energy(1:N_states) + print *, 'E+PT2 = ', CI_energy(1:N_states)+pt2(1:N_states) print *, '-----' call ezfio_set_cas_sd_energy_pt2(CI_energy(1)+pt2(1)) endif - integer :: exc_max, degree_min exc_max = 0 print *, 'CAS determinants : ', N_det_cas @@ -79,6 +115,7 @@ program full_ci call get_excitation_degree(psi_cas(1,1,k),psi_cas(1,1,i),degree,N_int) exc_max = max(exc_max,degree) enddo + print *, psi_coef_cas_diagonalized(i,:) call debug_det(psi_cas(1,1,i),N_int) print *, '' enddo diff --git a/plugins/CAS_SD/cas_sd.irp.f b/plugins/CAS_SD/cas_sd.irp.f index a5fc39b2..e2e8cb1f 100644 --- a/plugins/CAS_SD/cas_sd.irp.f +++ b/plugins/CAS_SD/cas_sd.irp.f @@ -1,7 +1,6 @@ program full_ci implicit none integer :: i,k - integer :: N_det_old double precision, allocatable :: pt2(:), norm_pert(:), H_pert_diag(:) @@ -11,9 +10,9 @@ program full_ci character*(64) :: perturbation PROVIDE N_det_cas - N_det_old = 0 pt2 = 1.d0 diag_algorithm = "Lapack" + if (N_det > N_det_max) then call diagonalize_CI call save_wavefunction @@ -30,36 +29,68 @@ program full_ci print *, 'E+PT2 = ', CI_energy+pt2 print *, '-----' endif + double precision :: i_H_psi_array(N_states),diag_H_mat_elem,h,i_O1_psi_array(N_states) + double precision :: E_CI_before(N_states) + if(read_wf)then + call i_H_psi(psi_det(1,1,N_det),psi_det,psi_coef,N_int,N_det,psi_det_size,N_states,i_H_psi_array) + h = diag_H_mat_elem(psi_det(1,1,N_det),N_int) + selection_criterion = dabs(psi_coef(N_det,1) * (i_H_psi_array(1) - h * psi_coef(N_det,1))) * 0.1d0 + soft_touch selection_criterion + endif + + integer :: n_det_before + print*,'Beginning the selection ...' + E_CI_before(1:N_states) = CI_energy(1:N_states) do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max) - N_det_old = N_det + n_det_before = N_det call H_apply_CAS_SD(pt2, norm_pert, H_pert_diag, N_st) PROVIDE psi_coef PROVIDE psi_det PROVIDE psi_det_sorted - if (N_det > N_det_max) then - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - N_det = N_det_max - soft_touch N_det psi_det psi_coef - endif call diagonalize_CI - call save_wavefunction - print *, 'N_det = ', N_det - print *, 'N_states = ', N_states - print *, 'PT2 = ', pt2 - print *, 'E = ', CI_energy - print *, 'E+PT2 = ', CI_energy+pt2 - print *, '-----' - call ezfio_set_cas_sd_energy(CI_energy(1)) - if (N_det == N_det_old) then - exit - endif - enddo - call diagonalize_CI + if (N_det > N_det_max) then + N_det = N_det_max + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + touch N_det psi_det psi_coef psi_det_sorted psi_coef_sorted psi_average_norm_contrib_sorted + endif + + + call save_wavefunction + if(n_det_before == N_det)then + selection_criterion = selection_criterion * 0.5d0 + endif + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k = 1, N_states + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', CI_energy(k) + print *, 'E(before)+PT2 = ', E_CI_before(k)+pt2(k) + enddo + print *, '-----' + if(N_states.gt.1)then + print*,'Variational Energy difference' + do i = 2, N_states + print*,'Delta E = ',CI_energy(i) - CI_energy(1) + enddo + endif + if(N_states.gt.1)then + print*,'Variational + perturbative Energy difference' + do i = 2, N_states + print*,'Delta E = ',E_CI_before(i)+ pt2(i) - (E_CI_before(1) + pt2(1)) + enddo + endif + E_CI_before(1:N_states) = CI_energy(1:N_states) + call ezfio_set_cas_sd_energy(CI_energy(1)) + enddo + N_det = min(N_det_max,N_det) + touch N_det psi_det psi_coef + call diagonalize_CI if(do_pt2_end)then print*,'Last iteration only to compute the PT2' threshold_selectors = 1.d0 @@ -70,13 +101,12 @@ program full_ci print *, 'N_det = ', N_det print *, 'N_states = ', N_states print *, 'PT2 = ', pt2 - print *, 'E = ', CI_energy - print *, 'E+PT2 = ', CI_energy+pt2 + print *, 'E = ', CI_energy(1:N_states) + print *, 'E+PT2 = ', CI_energy(1:N_states)+pt2(1:N_states) print *, '-----' call ezfio_set_cas_sd_energy_pt2(CI_energy(1)+pt2(1)) endif - integer :: exc_max, degree_min exc_max = 0 print *, 'CAS determinants : ', N_det_cas @@ -85,6 +115,7 @@ program full_ci call get_excitation_degree(psi_cas(1,1,k),psi_cas(1,1,i),degree,N_int) exc_max = max(exc_max,degree) enddo + print *, psi_coef_cas_diagonalized(i,:) call debug_det(psi_cas(1,1,i),N_int) print *, '' enddo diff --git a/plugins/CAS_SD/cas_sd_selected.irp.f b/plugins/CAS_SD/cas_sd_selected.irp.f index be893da4..d12e8430 100644 --- a/plugins/CAS_SD/cas_sd_selected.irp.f +++ b/plugins/CAS_SD/cas_sd_selected.irp.f @@ -12,6 +12,7 @@ program full_ci pt2 = 1.d0 diag_algorithm = "Lapack" + if (N_det > N_det_max) then call diagonalize_CI call save_wavefunction @@ -28,32 +29,68 @@ program full_ci print *, 'E+PT2 = ', CI_energy+pt2 print *, '-----' endif + double precision :: i_H_psi_array(N_states),diag_H_mat_elem,h,i_O1_psi_array(N_states) + double precision :: E_CI_before(N_states) + if(read_wf)then + call i_H_psi(psi_det(1,1,N_det),psi_det,psi_coef,N_int,N_det,psi_det_size,N_states,i_H_psi_array) + h = diag_H_mat_elem(psi_det(1,1,N_det),N_int) + selection_criterion = dabs(psi_coef(N_det,1) * (i_H_psi_array(1) - h * psi_coef(N_det,1))) * 0.1d0 + soft_touch selection_criterion + endif + + integer :: n_det_before + print*,'Beginning the selection ...' + E_CI_before(1:N_states) = CI_energy(1:N_states) do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max) + n_det_before = N_det call H_apply_CAS_SD_selected(pt2, norm_pert, H_pert_diag, N_st) PROVIDE psi_coef PROVIDE psi_det PROVIDE psi_det_sorted - if (N_det > N_det_max) then - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - N_det = N_det_max - soft_touch N_det psi_det psi_coef - endif call diagonalize_CI + + if (N_det > N_det_max) then + N_det = N_det_max + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + touch N_det psi_det psi_coef psi_det_sorted psi_coef_sorted psi_average_norm_contrib_sorted + endif + + call save_wavefunction - print *, 'N_det = ', N_det - print *, 'N_states = ', N_states - print *, 'PT2 = ', pt2 - print *, 'E = ', CI_energy(1:N_states) - print *, 'E+PT2 = ', CI_energy(1:N_states)+pt2(1:N_states) + if(n_det_before == N_det)then + selection_criterion = selection_criterion * 0.5d0 + endif + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k = 1, N_states + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', CI_energy(k) + print *, 'E(before)+PT2 = ', E_CI_before(k)+pt2(k) + enddo print *, '-----' + if(N_states.gt.1)then + print*,'Variational Energy difference' + do i = 2, N_states + print*,'Delta E = ',CI_energy(i) - CI_energy(1) + enddo + endif + if(N_states.gt.1)then + print*,'Variational + perturbative Energy difference' + do i = 2, N_states + print*,'Delta E = ',E_CI_before(i)+ pt2(i) - (E_CI_before(1) + pt2(1)) + enddo + endif + E_CI_before(1:N_states) = CI_energy(1:N_states) call ezfio_set_cas_sd_energy(CI_energy(1)) enddo - call diagonalize_CI - + N_det = min(N_det_max,N_det) + touch N_det psi_det psi_coef + call diagonalize_CI if(do_pt2_end)then print*,'Last iteration only to compute the PT2' threshold_selectors = 1.d0 @@ -70,7 +107,6 @@ program full_ci call ezfio_set_cas_sd_energy_pt2(CI_energy(1)+pt2(1)) endif - integer :: exc_max, degree_min exc_max = 0 print *, 'CAS determinants : ', N_det_cas @@ -79,6 +115,7 @@ program full_ci call get_excitation_degree(psi_cas(1,1,k),psi_cas(1,1,i),degree,N_int) exc_max = max(exc_max,degree) enddo + print *, psi_cas_coef(i,:) call debug_det(psi_cas(1,1,i),N_int) print *, '' enddo diff --git a/plugins/Full_CI/H_apply.irp.f b/plugins/Full_CI/H_apply.irp.f index 9e22b965..d870e4b0 100644 --- a/plugins/Full_CI/H_apply.irp.f +++ b/plugins/Full_CI/H_apply.irp.f @@ -23,6 +23,11 @@ s.unset_skip() #s.unset_openmp() print s +s = H_apply("FCI_no_selection") +s.set_selection_pt2("dummy") +s.unset_skip() +print s + s = H_apply("FCI_mono") s.set_selection_pt2("epstein_nesbet_2x2") s.unset_double_excitations() diff --git a/plugins/Full_CI/full_ci.irp.f b/plugins/Full_CI/full_ci.irp.f index 70a6ec43..42e773eb 100644 --- a/plugins/Full_CI/full_ci.irp.f +++ b/plugins/Full_CI/full_ci.irp.f @@ -11,7 +11,7 @@ program full_ci pt2 = 1.d0 diag_algorithm = "Lapack" - + if (N_det > N_det_max) then call diagonalize_CI call save_wavefunction diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index 80a6a64f..2aba32fe 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -23,42 +23,55 @@ subroutine run_wf integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states_diag) - character*(64) :: state + character*(64) :: states(2) + integer :: rc, i call provide_everything zmq_context = f77_zmq_ctx_new () - state = 'Waiting' + states(1) = 'selection' + states(2) = 'davidson' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() do - call wait_for_next_state(state) - print *, "STATE ", trim(state) - if(trim(state) == "Stopped") exit - if(trim(state) == 'selection') then - zmq_state = 'selection' - print *, 'Getting wave function' + + call wait_for_states(states,zmq_state,2) + + if(trim(zmq_state) == 'Stopped') then + + exit + + else if (trim(zmq_state) == 'selection') then + + ! Selection + ! --------- + + print *, 'Selection' call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) - - integer :: rc, i - - print *, 'Selection slave running' - !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() call selection_dressing_slave_tcp(i, energy) !$OMP END PARALLEL - else if(trim(state) == "davidson") then - zmq_state = 'davidson' + print *, 'Selection done' + + else if (trim(zmq_state) == 'davidson') then + + ! Davidson + ! -------- + + print *, 'Davidson' call davidson_miniserver_get() - print *, "Davidson slave running" + !$OMP PARALLEL PRIVATE(i) - i = omp_get_thread_num() - call davidson_slave_tcp(i) + i = omp_get_thread_num() + call davidson_slave_tcp(i) !$OMP END PARALLEL - end if + print *, 'Davidson done' + + endif + end do end diff --git a/plugins/MRCC_CASSD/mrcc_cassd.irp.f b/plugins/MRCC_CASSD/mrcc_cassd.irp.f index 0d49be89..4fef815d 100644 --- a/plugins/MRCC_CASSD/mrcc_cassd.irp.f +++ b/plugins/MRCC_CASSD/mrcc_cassd.irp.f @@ -35,10 +35,10 @@ subroutine run(N_st,energy) print *, 'MRCC Iteration', iteration print *, '===========================' print *, '' - E_old = sum(ci_energy_dressed) + E_old = sum(ci_energy_dressed(1:N_st)) call write_double(6,ci_energy_dressed(1),"MRCC energy") call diagonalize_ci_dressed(lambda) - E_new = sum(ci_energy_dressed) + E_new = sum(ci_energy_dressed(1:N_st)) delta_E = dabs(E_new - E_old) call save_wavefunction call ezfio_set_mrcc_cassd_energy(ci_energy_dressed(1)) @@ -47,7 +47,7 @@ subroutine run(N_st,energy) endif enddo call write_double(6,ci_energy_dressed(1),"Final MRCC energy") - energy(:) = ci_energy_dressed(:) + energy(1:N_st) = ci_energy_dressed(1:N_st) end diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 085799f6..a67ca676 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -548,3 +548,621 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) end + +subroutine davidson_diag_mrcc_hs2(dets_in,u_in,dim_in,energies,sze,N_st,N_st_diag,Nint,iunit,istate) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! iunit : Unit number for the I/O + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint, iunit, istate + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st) + double precision, allocatable :: H_jj(:), S2_jj(:) + + double precision :: diag_h_mat_elem + integer :: i + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_bielec_integrals_in_map + allocate(H_jj(sze), S2_jj(sze)) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze,H_jj,S2_jj, dets_in,Nint,N_det_ref,delta_ii, & + !$OMP idx_ref, istate) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(guided) + do i=1,sze + H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint) + call get_s2(dets_in(1,1,i),dets_in(1,1,i),Nint,S2_jj(i)) + enddo + !$OMP END DO + !$OMP DO SCHEDULE(guided) + do i=1,N_det_ref + H_jj(idx_ref(i)) += delta_ii(istate,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit,istate) + deallocate (H_jj,S2_jj) +end + + +subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit,istate ) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! S2_jj : specific diagonal S^2 matrix elements + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag : Number of states in which H is diagonalized. Assumed > sze + ! + ! iunit : Unit for the I/O + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint, istate + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze), S2_jj(sze) + integer, intent(in) :: iunit + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + + integer :: sze_8 + integer :: iter + integer :: i,j,k,l,m + logical :: converged + + double precision, allocatable :: overlap(:,:) + double precision :: u_dot_v, u_dot_u + + integer, allocatable :: kl_pairs(:,:) + integer :: k_pairs, kl + + integer :: iter2 + double precision, allocatable :: W(:,:), U(:,:), R(:,:), S(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) + double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(3,N_st) + double precision :: cpu, wall + integer :: shift, shift2 + include 'constants.include.F' + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, S, y, h, lambda + if (N_st_diag > sze) then + stop 'error in Davidson : N_st_diag > sze' + endif + + PROVIDE nuclear_repulsion + + call write_time(iunit) + call wall_time(wall) + call cpu_time(cpu) + write(iunit,'(A)') '' + write(iunit,'(A)') 'Davidson Diagonalization' + write(iunit,'(A)') '------------------------' + write(iunit,'(A)') '' + call write_int(iunit,N_st,'Number of states') + call write_int(iunit,N_st_diag,'Number of states in diagonalization') + call write_int(iunit,sze,'Number of determinants') + call write_int(iunit,istate,'Using dressing for state ') + + write(iunit,'(A)') '' + write_buffer = '===== ' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(iunit,'(A)') trim(write_buffer) + write_buffer = ' Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual' + enddo + write(iunit,'(A)') trim(write_buffer) + write_buffer = '===== ' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(iunit,'(A)') trim(write_buffer) + + integer, external :: align_double + sze_8 = align_double(sze) + + double precision :: delta + + if (s2_eig) then + delta = 1.d0 + else + delta = 0.d0 + endif + + allocate( & + kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & + W(sze_8,N_st_diag*davidson_sze_max), & + U(sze_8,N_st_diag*davidson_sze_max), & + R(sze_8,N_st_diag), & + S(sze_8,N_st_diag*davidson_sze_max), & + h(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + y(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + s_(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + s_tmp(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + residual_norm(N_st_diag), & + overlap(N_st_diag,N_st_diag), & + c(N_st_diag*davidson_sze_max), & + s2(N_st_diag*davidson_sze_max), & + lambda(N_st_diag*davidson_sze_max)) + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + + do k=1,N_st + call normalize(u_in(1,k),sze) + enddo + + do k=N_st+1,N_st_diag + do i=1,sze + double precision :: r1, r2 + call random_number(r1) + call random_number(r2) + u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2) + enddo + + ! Gram-Schmidt + ! ------------ + call dgemv('T',sze,k-1,1.d0,u_in,size(u_in,1), & + u_in(1,k),1,0.d0,c,1) + call dgemv('N',sze,k-1,-1.d0,u_in,size(u_in,1), & + c,1,1.d0,u_in(1,k),1) + call normalize(u_in(1,k),sze) + enddo + + + do while (.not.converged) + + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + + do iter=1,davidson_sze_max-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + + + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------------- + + + call H_S2_u_0_mrcc_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,& + istate,N_st_diag,sze_8) + + + ! Compute h_kl = = + ! ------------------------------------------- + + +! do l=1,N_st_diag +! do k=1,N_st_diag +! do iter2=1,iter-1 +! h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze) +! h(k,iter,l,iter2) = h(k,iter2,l,iter) +! enddo +! enddo +! do k=1,l +! h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze) +! h(l,iter,k,iter) = h(k,iter,l,iter) +! enddo +! enddo + + call dgemm('T','N', shift2, N_st_diag, sze, & + 1.d0, U, size(U,1), W(1,shift+1), size(W,1), & + 0.d0, h(1,shift+1), size(h,1)) + + call dgemm('T','N', shift2, N_st_diag, sze, & + 1.d0, U, size(U,1), S(1,shift+1), size(S,1), & + 0.d0, s_(1,shift+1), size(s_,1)) + + ! Diagonalize h + ! ------------- + call lapack_diag(lambda,y,h,size(h,1),shift2) + + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + S_z2_Sz + enddo + + if (s2_eig) then + logical :: state_ok(N_st_diag*davidson_sze_max) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.3d0) + enddo + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + endif + + + ! Express eigenvectors of h in the determinant basis + ! -------------------------------------------------- + +! do k=1,N_st_diag +! do i=1,sze +! U(i,shift2+k) = 0.d0 +! W(i,shift2+k) = 0.d0 +! S(i,shift2+k) = 0.d0 +! enddo +! do l=1,N_st_diag*iter +! do i=1,sze +! U(i,shift2+k) = U(i,shift2+k) + U(i,l)*y(l,k) +! W(i,shift2+k) = W(i,shift2+k) + W(i,l)*y(l,k) +! S(i,shift2+k) = S(i,shift2+k) + S(i,l)*y(l,k) +! enddo +! enddo +! enddo +! +! + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1)) + + ! Compute residual vector + ! ----------------------- + +! do k=1,N_st_diag +! print *, s2(k) +! s2(k) = u_dot_v(U(1,shift2+k), S(1,shift2+k), sze) + S_z2_Sz +! print *, s2(k) +! print *, '' +! pause +! enddo + do k=1,N_st_diag + do i=1,sze + R(i,k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz) + enddo + if (k <= N_st) then + residual_norm(k) = u_dot_u(R(1,k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = s2(k) + to_print(3,k) = residual_norm(k) + if (residual_norm(k) > 1.e9) then + stop 'Davidson failed' + endif + endif + enddo + + write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(:,1:N_st) + call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) + if (converged) then + exit + endif + + ! Davidson step + ! ------------- + + do k=1,N_st_diag + do i=1,sze + U(i,shift2+k) = - R(i,k)/max(H_jj(i) - lambda(k),1.d-2) + enddo + enddo + + ! Gram-Schmidt + ! ------------ + + do k=1,N_st_diag + +! do l=1,N_st_diag*iter +! c(1) = u_dot_v(U(1,shift2+k),U(1,l),sze) +! do i=1,sze +! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,l) +! enddo +! enddo +! + call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1), & + U(1,shift2+k),1,0.d0,c,1) + call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1), & + c,1,1.d0,U(1,shift2+k),1) +! +! do l=1,k-1 +! c(1) = u_dot_v(U(1,shift2+k),U(1,shift2+l),sze) +! do i=1,sze +! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,shift2+l) +! enddo +! enddo +! + call dgemv('T',sze,k-1,1.d0,U(1,shift2+1),size(U,1), & + U(1,shift2+k),1,0.d0,c,1) + call dgemv('N',sze,k-1,-1.d0,U(1,shift2+1),size(U,1), & + c,1,1.d0,U(1,shift2+k),1) + + call normalize( U(1,shift2+k), sze ) + enddo + + enddo + + if (.not.converged) then + iter = davidson_sze_max-1 + endif + + ! Re-contract to u_in + ! ----------- + + do k=1,N_st_diag + energies(k) = lambda(k) + enddo + +! do k=1,N_st_diag +! do i=1,sze +! do l=1,iter*N_st_diag +! u_in(i,k) += U(i,l)*y(l,k) +! enddo +! enddo +! enddo +! enddo + + call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & + U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + + enddo + + write_buffer = '===== ' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(iunit,'(A)') trim(write_buffer) + write(iunit,'(A)') '' + call write_time(iunit) + + deallocate ( & + kl_pairs, & + W, residual_norm, & + U, overlap, & + R, c, S, & + h, & + y, s_, s_tmp, & + lambda & + ) +end + + +subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) + use bitmasks + implicit none + BEGIN_DOC + ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> + ! + ! n : number of determinants + ! + ! H_jj : array of + ! + ! S2_jj : array of + END_DOC + integer, intent(in) :: N_st,n,Nint, sze_8, istate_in + double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) + double precision, intent(in) :: u_0(sze_8,N_st) + double precision, intent(in) :: H_jj(n), S2_jj(n) + integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) + double precision :: hij,s2 + double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) + integer :: i,j,k,l, jj,ii + integer :: i0, j0 + + integer, allocatable :: shortcut(:,:), sort_idx(:,:) + integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:) + integer(bit_kind) :: sorted_i(Nint) + + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate + integer :: N_st_8 + + integer, external :: align_double + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + + N_st_8 = align_double(N_st) + + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + ASSERT (n>0) + PROVIDE ref_bitmask_energy + + allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) + allocate(ut(N_st_8,n)) + + v_0 = 0.d0 + s_0 = 0.d0 + + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(i,istate) + enddo + enddo + + call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) + call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& + !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, & + !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij,istate_in) + allocate(vt(N_st_8,n),st(N_st_8,n)) + Vt = 0.d0 + St = 0.d0 + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0,1) + do sh2=sh,shortcut(0,1) + exa = 0 + do ni=1,Nint + exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut(sh,1),shortcut(sh+1,1)-1 + org_i = sort_idx(i,1) + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1,1)-1 + end if + do ni=1,Nint + sorted_i(ni) = sorted(ni,i,1) + enddo + + do j=shortcut(sh2,1),endi + org_j = sort_idx(j,1) + ext = exa + do ni=1,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + end do + if(ext <= 4) then + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) + vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) + st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + enddo + endif + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0,2) + do i=shortcut(sh,2),shortcut(sh+1,2)-1 + org_i = sort_idx(i,2) + do j=shortcut(sh,2),i-1 + org_j = sort_idx(j,2) + ext = 0 + do ni=1,Nint + ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) + end do + if(ext == 4) then + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) + vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) + st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + enddo + end if + end do + end do + enddo + !$OMP END DO NOWAIT + +! -------------------------- +! Begin Specific to dressing +! -------------------------- + + !$OMP DO + do ii=1,n_det_ref + i = idx_ref(ii) + do jj = 1, n_det_non_ref + j = idx_non_ref(jj) + do istate=1,N_st + vt (istate,i) = vt (istate,i) + delta_ij(istate_in,jj,ii)*ut(istate,j) + vt (istate,j) = vt (istate,j) + delta_ij(istate_in,jj,ii)*ut(istate,i) + enddo + enddo + enddo + !$OMP END DO + +! ------------------------ +! End Specific to dressing +! ------------------------ + + !$OMP CRITICAL + do istate=1,N_st + do i=n,1,-1 + v_0(i,istate) = v_0(i,istate) + vt(istate,i) + s_0(i,istate) = s_0(i,istate) + st(istate,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(vt,st) + !$OMP END PARALLEL + + do istate=1,N_st + do i=1,n + v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate) + s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) + enddo + enddo + deallocate (shortcut, sort_idx, sorted, version, ut) +end + diff --git a/plugins/MRCC_Utils/mrcc_dress.irp.f b/plugins/MRCC_Utils/mrcc_dress.irp.f index 412c52e2..e6d0fb81 100644 --- a/plugins/MRCC_Utils/mrcc_dress.irp.f +++ b/plugins/MRCC_Utils/mrcc_dress.irp.f @@ -53,7 +53,6 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Nstates, Ndet_non_ref, Ndet_ref,i_ge integer :: mobiles(2), smallerlist logical, external :: is_generable - print *, i_generator leng = max(N_det_generators, N_det_non_ref) allocate(miniList(Nint, 2, leng), idx_minilist(leng), hij_cache(N_det_non_ref)) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 12ae089b..7687b451 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -148,8 +148,14 @@ END_PROVIDER if (diag_algorithm == "Davidson") then - call davidson_diag_mrcc(psi_det,CI_eigenvectors_dressed,CI_electronic_energy_dressed,& - size(CI_eigenvectors_dressed,1),N_det,N_states,N_states_diag,N_int,output_determinants,mrcc_state) +! call davidson_diag_mrcc(psi_det,CI_eigenvectors_dressed,CI_electronic_energy_dressed,& +! size(CI_eigenvectors_dressed,1),N_det,N_states,N_states_diag,N_int,output_determinants,mrcc_state) + + call davidson_diag_mrcc_HS2(psi_det,CI_eigenvectors_dressed,& + size(CI_eigenvectors_dressed,1), & + CI_electronic_energy_dressed,N_det,N_states,N_states_diag,N_int, & + output_determinants,mrcc_state) + call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,& N_states_diag,size(CI_eigenvectors_dressed,1)) @@ -236,69 +242,6 @@ END_PROVIDER deallocate(eigenvectors,eigenvalues) endif - if( s2_eig.and.(n_states_diag > 1).and.(n_det >= n_states_diag) )then - ! Diagonalizing S^2 within the "n_states_diag" states found - allocate(s2_eigvalues(N_states_diag), e_array(N_states_diag)) - call diagonalize_s2_betweenstates(psi_det,CI_eigenvectors_dressed,n_det,size(psi_det,3),size(CI_eigenvectors_dressed,1),min(n_states_diag,n_det),s2_eigvalues) - - double precision, allocatable :: psi_coef_tmp(:,:) - allocate(psi_coef_tmp(psi_det_size,N_states_diag)) - do j = 1, N_states_diag - do i = 1, N_det - psi_coef_tmp(i,j) = CI_eigenvectors_dressed(i,j) - enddo - enddo - call u_0_H_u_0_mrcc_nstates(e_array,psi_coef_tmp,n_det,psi_det,N_int,mrcc_state,N_states,psi_det_size) - - ! Browsing the "n_states_diag" states and getting the lowest in energy "n_states" ones that have the S^2 value - ! closer to the "expected_s2" set as input - - allocate(index_good_state_array(N_det),good_state_array(N_det)) - good_state_array = .False. - i_state = 0 - do j = 1, N_states_diag - if(dabs(s2_eigvalues(j)-expected_s2).le.0.5d0)then - good_state_array(j) = .True. - i_state +=1 - index_good_state_array(i_state) = j - endif - enddo - ! Sorting the i_state good states by energy - allocate(iorder(i_state)) - do j = 1, i_state - do i = 1, N_det - CI_eigenvectors_dressed(i,j) = psi_coef_tmp(i,index_good_state_array(j)) - enddo - CI_eigenvectors_s2_dressed(j) = s2_eigvalues(index_good_state_array(j)) - CI_electronic_energy_dressed(j) = e_array(j) - iorder(j) = j - enddo - call dsort(e_array,iorder,i_state) - do j = 1, i_state - CI_electronic_energy_dressed(j) = e_array(j) - CI_eigenvectors_s2_dressed(j) = s2_eigvalues(index_good_state_array(iorder(j))) - do i = 1, N_det - CI_eigenvectors_dressed(i,j) = psi_coef_tmp(i,index_good_state_array(iorder(j))) - enddo - enddo - - ! Then setting the other states without any specific energy order - i_other_state = 0 - do j = 1, N_states_diag - if(good_state_array(j))cycle - i_other_state +=1 - do i = 1, N_det - CI_eigenvectors_dressed(i,i_state + i_other_state) = psi_coef_tmp(i,j) - enddo - CI_eigenvectors_s2_dressed(i_state + i_other_state) = s2_eigvalues(j) - CI_electronic_energy_dressed(i_state + i_other_state) = e_array(i_state + i_other_state) - enddo - deallocate(iorder,e_array,index_good_state_array,good_state_array,psi_coef_tmp) - - deallocate(s2_eigvalues) - - endif - END_PROVIDER BEGIN_PROVIDER [ double precision, CI_energy_dressed, (N_states_diag) ] @@ -644,6 +587,7 @@ end subroutine N_ex_exists = 0 n = 0 + !TODO Openmp do i=1, N_det_ref do l=1, N_det_non_ref call get_excitation(psi_ref(1,1,i), psi_non_ref(1,1,l), exc, degree, phase, N_int) diff --git a/plugins/Perturbation/pt2_equations.irp.f b/plugins/Perturbation/pt2_equations.irp.f index e406cd03..66083f6f 100644 --- a/plugins/Perturbation/pt2_equations.irp.f +++ b/plugins/Perturbation/pt2_equations.irp.f @@ -345,6 +345,37 @@ subroutine pt2_epstein_nesbet_sc2 ($arguments) end +subroutine pt2_dummy ($arguments) + use bitmasks + implicit none + $declarations + + BEGIN_DOC + ! Dummy perturbation to add all connected determinants. + END_DOC + + integer :: i,j + double precision :: diag_H_mat_elem_fock, h + double precision :: i_H_psi_array(N_st) + PROVIDE selection_criterion + + call i_H_psi_minilist(det_pert,minilist,idx_minilist,N_minilist,psi_selectors_coef,Nint,N_minilist,psi_selectors_size,N_st,i_H_psi_array) + + h = diag_H_mat_elem_fock(det_ref,det_pert,fock_diag_tmp,Nint) + do i =1,N_st + if (i_H_psi_array(i) /= 0.d0) then + c_pert(i) = i_H_psi_array(i) / (electronic_energy(i) - h) + H_pert_diag(i) = h*c_pert(i)*c_pert(i) + e_2_pert(i) = 1.d0 + else + c_pert(i) = 0.d0 + e_2_pert(i) = 0.d0 + H_pert_diag(i) = 0.d0 + endif + enddo + +end + SUBST [ arguments, declarations ] diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f index 62f0aeaa..27036f33 100644 --- a/plugins/Selectors_full/selectors.irp.f +++ b/plugins/Selectors_full/selectors.irp.f @@ -14,7 +14,7 @@ BEGIN_PROVIDER [ integer, N_det_selectors] integer :: i double precision :: norm, norm_max call write_time(output_determinants) - N_det_selectors = N_det + N_det_selectors = N_det_generators if (threshold_generators < 1.d0) then norm = 0.d0 do i=1,N_det @@ -24,7 +24,7 @@ BEGIN_PROVIDER [ integer, N_det_selectors] exit endif enddo - N_det_selectors = max(N_det_selectors,1) + N_det_selectors = max(N_det_selectors,N_det_generators) endif call write_int(output_determinants,N_det_selectors,'Number of selectors') END_PROVIDER diff --git a/scripts/ezfio_interface/ei_handler.py b/scripts/ezfio_interface/ei_handler.py index d7cd9c95..ef15c9b8 100755 --- a/scripts/ezfio_interface/ei_handler.py +++ b/scripts/ezfio_interface/ei_handler.py @@ -45,7 +45,7 @@ Optional: (by default is one) Example : 1, =sum(ao_num); (ao_num,3) ATTENTION : The module and the value are separed by a "." not a "_". - For exemple (determinants.n_det) + For example (determinants.n_det) ezfio_name: The name for the EZFIO lib (by default is ) ezfio_dir: Will be the folder of EZFIO. diff --git a/scripts/module/qp_module.py b/scripts/module/qp_module.py index 06ad5dd2..adeb3a46 100755 --- a/scripts/module/qp_module.py +++ b/scripts/module/qp_module.py @@ -213,7 +213,7 @@ def main(arguments): print "[ OK ]" print "" print "You can now compile as usual" - print "`cd {0} ; ninja` for exemple".format(QP_ROOT) + print "`cd {0} ; ninja` for example".format(QP_ROOT) print " or --in developement mode-- you can cd in a directory and compile here" elif arguments["uninstall"]: diff --git a/src/AO_Basis/EZFIO.cfg b/src/AO_Basis/EZFIO.cfg index 34bf2879..9e548514 100644 --- a/src/AO_Basis/EZFIO.cfg +++ b/src/AO_Basis/EZFIO.cfg @@ -54,3 +54,25 @@ type: logical doc: If true, use AOs in Cartesian coordinates (6d,10f,...) interface: ezfio, provider default: false + +[integral_overlap] +type: double precision +doc: Overlap integrals in AO basis set +size: (ao_basis.ao_num,ao_basis.ao_num) +interface: ezfio +default: false + +[integral_nuclear] +type: double precision +doc: Nucleus-electron integrals in AO basis set +size: (ao_basis.ao_num,ao_basis.ao_num) +interface: ezfio +default: false + +[integral_kinetic] +type: double precision +doc: Kinetic energy integrals in AO basis set +size: (ao_basis.ao_num,ao_basis.ao_num) +interface: ezfio +default: false + diff --git a/src/AO_Basis/ao_overlap.irp.f b/src/AO_Basis/ao_overlap.irp.f index 4487ff77..edf48b25 100644 --- a/src/AO_Basis/ao_overlap.irp.f +++ b/src/AO_Basis/ao_overlap.irp.f @@ -14,51 +14,60 @@ double precision :: alpha, beta, c double precision :: A_center(3), B_center(3) integer :: power_A(3), power_B(3) - dim1=100 - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B,& - !$OMP overlap_x,overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,c) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & - !$OMP ao_expo_ordered_transp,dim1) - do j=1,ao_num - A_center(1) = nucl_coord( ao_nucl(j), 1 ) - A_center(2) = nucl_coord( ao_nucl(j), 2 ) - A_center(3) = nucl_coord( ao_nucl(j), 3 ) - power_A(1) = ao_power( j, 1 ) - power_A(2) = ao_power( j, 2 ) - power_A(3) = ao_power( j, 3 ) - !DEC$ VECTOR ALIGNED - !DEC$ VECTOR ALWAYS - do i= 1,ao_num - ao_overlap(i,j)= 0.d0 - ao_overlap_x(i,j)= 0.d0 - ao_overlap_y(i,j)= 0.d0 - ao_overlap_z(i,j)= 0.d0 - B_center(1) = nucl_coord( ao_nucl(i), 1 ) - B_center(2) = nucl_coord( ao_nucl(i), 2 ) - B_center(3) = nucl_coord( ao_nucl(i), 3 ) - power_B(1) = ao_power( i, 1 ) - power_B(2) = ao_power( i, 2 ) - power_B(3) = ao_power( i, 3 ) - do n = 1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(n,j) - !DEC$ VECTOR ALIGNED - do l = 1, ao_prim_num(i) - beta = ao_expo_ordered_transp(l,i) - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) - ao_overlap(i,j) += c * overlap - ao_overlap_x(i,j) += c * overlap_x - ao_overlap_y(i,j) += c * overlap_y - ao_overlap_z(i,j) += c * overlap_z - enddo +! if (read_ao_one_integrals) then +! call ezfio_get_ao_basis_integral_overlap(ao_overlap(1:ao_num, 1:ao_num)) +! print *, 'AO overlap integrals read from disk' +! else + dim1=100 + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B,& + !$OMP overlap_x,overlap_y, overlap_z, overlap, & + !$OMP alpha, beta,i,j,c) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & + !$OMP ao_expo_ordered_transp,dim1) + do j=1,ao_num + A_center(1) = nucl_coord( ao_nucl(j), 1 ) + A_center(2) = nucl_coord( ao_nucl(j), 2 ) + A_center(3) = nucl_coord( ao_nucl(j), 3 ) + power_A(1) = ao_power( j, 1 ) + power_A(2) = ao_power( j, 2 ) + power_A(3) = ao_power( j, 3 ) + !DEC$ VECTOR ALIGNED + !DEC$ VECTOR ALWAYS + do i= 1,ao_num + ao_overlap(i,j)= 0.d0 + ao_overlap_x(i,j)= 0.d0 + ao_overlap_y(i,j)= 0.d0 + ao_overlap_z(i,j)= 0.d0 + B_center(1) = nucl_coord( ao_nucl(i), 1 ) + B_center(2) = nucl_coord( ao_nucl(i), 2 ) + B_center(3) = nucl_coord( ao_nucl(i), 3 ) + power_B(1) = ao_power( i, 1 ) + power_B(2) = ao_power( i, 2 ) + power_B(3) = ao_power( i, 3 ) + do n = 1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(n,j) + !DEC$ VECTOR ALIGNED + do l = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(l,i) + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) + ao_overlap(i,j) += c * overlap + ao_overlap_x(i,j) += c * overlap_x + ao_overlap_y(i,j) += c * overlap_y + ao_overlap_z(i,j) += c * overlap_z + enddo + enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + enddo + !$OMP END PARALLEL DO +! endif +! if (write_ao_one_integrals) then +! call ezfio_set_ao_basis_integral_overlap(ao_overlap(1:ao_num, 1:ao_num)) +! print *, 'AO overlap integrals written to disk' +! endif END_PROVIDER diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg index de2a6954..415e359e 100644 --- a/src/Davidson/EZFIO.cfg +++ b/src/Davidson/EZFIO.cfg @@ -7,6 +7,6 @@ default: 1.e-12 [n_states_diag] type: States_number doc: n_states_diag -default: 1 +default: 10 interface: ezfio,provider,ocaml diff --git a/src/Davidson/davidson.irp.f b/src/Davidson/davidson.irp.f deleted file mode 100644 index abe3b504..00000000 --- a/src/Davidson/davidson.irp.f +++ /dev/null @@ -1,3 +0,0 @@ -program davidson - stop 1 -end diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 6ab90d44..6935256e 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -12,8 +12,8 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st) integer , intent(in) :: blockb, blocke integer , intent(inout) :: N integer , intent(inout) :: idx(dav_size) - double precision , intent(inout) :: vt(N_states, dav_size) - double precision , intent(inout) :: st(N_states, dav_size) + double precision , intent(inout) :: vt(N_states_diag, dav_size) + double precision , intent(inout) :: st(N_states_diag, dav_size) integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi integer(bit_kind) :: sorted_i(N_int) @@ -63,7 +63,7 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st) vt (:,org_j) = 0d0 st (:,org_j) = 0d0 end if - do istate=1,N_states + do istate=1,N_states_diag vt (istate,org_i) += hij*dav_ut(istate,org_j) st (istate,org_i) += s2*dav_ut(istate,org_j) vt (istate,org_j) += hij*dav_ut(istate,org_i) @@ -79,7 +79,7 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st) do i=1, dav_size if(wrotten(i)) then N = N+1 - do istate=1,N_states + do istate=1,N_states_diag vt (istate,N) = vt (istate,i) st (istate,N) = st (istate,i) idx(N) = i @@ -91,23 +91,28 @@ end subroutine -subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0, s0) +subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t) implicit none integer , intent(in) :: blockb, blocke integer , intent(in) :: N integer , intent(in) :: idx(N) - double precision , intent(in) :: vt(N_states, N) - double precision , intent(in) :: st(N_states, N) - double precision , intent(inout) :: v0(dav_size, N_states) - double precision , intent(inout) :: s0(dav_size, N_states) + double precision , intent(in) :: vt(N_states_diag, N) + double precision , intent(in) :: st(N_states_diag, N) + double precision , intent(inout) :: v0t(N_states_diag,dav_size) + double precision , intent(inout) :: s0t(N_states_diag,dav_size) - integer :: i + integer :: i, j, k + !DIR$ IVDEP do i=1,N - v0(idx(i), :) += vt(:, i) - s0(idx(i), :) += st(:, i) + k = idx(i) + !DIR$ IVDEP + do j=1,N_states_diag + v0t(j,k) = v0t(j,k) + vt(j,i) + s0t(j,k) = s0t(j,k) + st(j,i) + enddo end do end subroutine @@ -210,8 +215,8 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) allocate(idx(dav_size)) - allocate(vt(N_states, dav_size)) - allocate(st(N_states, dav_size)) + allocate(vt(N_states_diag, dav_size)) + allocate(st(N_states_diag, dav_size)) do @@ -221,7 +226,6 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) call davidson_process(blockb, blocke, N, idx, vt, st) - ! reverse ? call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id) end do @@ -239,8 +243,8 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st integer ,intent(in) :: blockb, blocke integer ,intent(in) :: N integer ,intent(in) :: idx(N) - double precision ,intent(in) :: vt(N_states, N) - double precision ,intent(in) :: st(N_states, N) + double precision ,intent(in) :: vt(N_states_diag, N) + double precision ,intent(in) :: st(N_states_diag, N) integer :: rc rc = f77_zmq_send( zmq_socket_push, blockb, 4, ZMQ_SNDMORE) @@ -255,11 +259,11 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st rc = f77_zmq_send( zmq_socket_push, idx, 4*N, ZMQ_SNDMORE) if(rc /= 4*N) stop "davidson_push_results failed to push idx" - rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states* N, ZMQ_SNDMORE) - if(rc /= 8*N_states* N) stop "davidson_push_results failed to push vt" + rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states_diag* N, ZMQ_SNDMORE) + if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push vt" - rc = f77_zmq_send( zmq_socket_push, st, 8*N_states* N, ZMQ_SNDMORE) - if(rc /= 8*N_states* N) stop "davidson_push_results failed to push st" + rc = f77_zmq_send( zmq_socket_push, st, 8*N_states_diag* N, ZMQ_SNDMORE) + if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push st" rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "davidson_push_results failed to push task_id" @@ -276,8 +280,8 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st integer ,intent(out) :: blockb, blocke integer ,intent(out) :: N integer ,intent(out) :: idx(dav_size) - double precision ,intent(out) :: vt(N_states, dav_size) - double precision ,intent(out) :: st(N_states, dav_size) + double precision ,intent(out) :: vt(N_states_diag, dav_size) + double precision ,intent(out) :: st(N_states_diag, dav_size) integer :: rc @@ -293,11 +297,11 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0) if(rc /= 4*N) stop "davidson_push_results failed to pull idx" - rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states* N, 0) - if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull vt" + rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states_diag* N, 0) + if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull vt" - rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states* N, 0) - if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull st" + rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states_diag* N, 0) + if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull st" rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" @@ -312,8 +316,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision ,intent(inout) :: v0(dav_size, N_states) - double precision ,intent(inout) :: s0(dav_size, N_states) + double precision ,intent(inout) :: v0(dav_size, N_states_diag) + double precision ,intent(inout) :: s0(dav_size, N_states_diag) integer :: more, task_id @@ -321,29 +325,31 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer :: blockb, blocke integer :: N integer , allocatable :: idx(:) - double precision , allocatable :: vt(:,:) + double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:) double precision , allocatable :: st(:,:) - integer :: deleted - logical, allocatable :: done(:) - allocate(done(shortcut_(0,1))) - deleted = 0 - done = .false. allocate(idx(dav_size)) - allocate(vt(N_states, dav_size)) - allocate(st(N_states, dav_size)) + allocate(vt(N_states_diag, dav_size)) + allocate(st(N_states_diag, dav_size)) + allocate(v0t(N_states_diag, dav_size)) + allocate(s0t(N_states_diag, dav_size)) + v0t = 00.d0 + s0t = 00.d0 + more = 1 do while (more == 1) call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id) - call davidson_collect(blockb, blocke, N, idx, vt, st , v0, s0) -! done(block) = .true. + !DIR$ FORCEINLINE + call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t) call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) - deleted += 1 -! print *, "DELETED", deleted, done end do -! print *, "done collector" + deallocate(idx,vt,st) + + call dtranspose(v0t,size(v0t,1), v0, size(v0,1), N_states_diag, dav_size) + call dtranspose(s0t,size(s0t,1), s0, size(s0,1), N_states_diag, dav_size) + deallocate(v0t,s0t) end subroutine @@ -360,8 +366,8 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) integer :: i integer, external :: omp_get_thread_num - double precision , intent(inout) :: v0(dav_size, N_states) - double precision , intent(inout) :: s0(dav_size, N_states) + double precision , intent(inout) :: v0(dav_size, N_states_diag) + double precision , intent(inout) :: s0(dav_size, N_states_diag) call zmq_set_running(zmq_to_qp_run_socket) @@ -393,7 +399,6 @@ end subroutine subroutine davidson_miniserver_run() use f77_zmq implicit none - integer(ZMQ_PTR) context integer(ZMQ_PTR) responder character*(64) address character(len=:), allocatable :: buffer @@ -402,8 +407,7 @@ subroutine davidson_miniserver_run() allocate (character(len=20) :: buffer) address = 'tcp://*:11223' - context = f77_zmq_ctx_new() - responder = f77_zmq_socket(context, ZMQ_REP) + responder = f77_zmq_socket(zmq_context, ZMQ_REP) rc = f77_zmq_bind(responder,address) do @@ -411,7 +415,7 @@ subroutine davidson_miniserver_run() if (buffer(1:rc) /= 'end') then rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states, 0) + rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0) else rc = f77_zmq_send (responder, "end", 3, 0) exit @@ -419,7 +423,6 @@ subroutine davidson_miniserver_run() enddo rc = f77_zmq_close(responder) - rc = f77_zmq_ctx_destroy(context) end subroutine @@ -427,21 +430,18 @@ subroutine davidson_miniserver_end() implicit none use f77_zmq - integer(ZMQ_PTR) context integer(ZMQ_PTR) requester character*(64) address integer rc character*(64) buf address = trim(qp_run_address)//':11223' - context = f77_zmq_ctx_new() - requester = f77_zmq_socket(context, ZMQ_REQ) + requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) rc = f77_zmq_send(requester, "end", 3, 0) rc = f77_zmq_recv(requester, buf, 3, 0) rc = f77_zmq_close(requester) - rc = f77_zmq_ctx_destroy(context) end subroutine @@ -449,7 +449,6 @@ subroutine davidson_miniserver_get() implicit none use f77_zmq - integer(ZMQ_PTR) context integer(ZMQ_PTR) requester character*(64) address character*(20) buffer @@ -457,21 +456,17 @@ subroutine davidson_miniserver_get() address = trim(qp_run_address)//':11223' - context = f77_zmq_ctx_new() - requester = f77_zmq_socket(context, ZMQ_REQ) + requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) rc = f77_zmq_send(requester, "Hello", 5, 0) rc = f77_zmq_recv(requester, dav_size, 4, 0) TOUCH dav_size rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) - rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states, 0) + rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) TOUCH dav_det dav_ut - rc = f77_zmq_close(requester) - rc = f77_zmq_ctx_destroy(context) - touch dav_det dav_ut end subroutine @@ -480,7 +475,7 @@ BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ] END_PROVIDER -BEGIN_PROVIDER [ double precision, dav_ut, (N_states, dav_size) ] +BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ] END_PROVIDER diff --git a/src/Davidson/davidson_slave.irp.f b/src/Davidson/davidson_slave.irp.f index 15830b1d..b5ec0592 100644 --- a/src/Davidson/davidson_slave.irp.f +++ b/src/Davidson/davidson_slave.irp.f @@ -8,7 +8,7 @@ program davidson_slave double precision :: energy(N_states_diag) character*(64) :: state -! call provide_everything + call provide_everything call switch_qp_run_to_master zmq_context = f77_zmq_ctx_new () @@ -35,6 +35,6 @@ program davidson_slave end do end -! subroutine provide_everything -! PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states zmq_context -! end subroutine +subroutine provide_everything + PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states_diag zmq_context +end subroutine diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 7ebf222b..c44a27d2 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -71,7 +71,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! ! N_st : Number of eigenstates ! - ! N_st_diag : Number of states in which H is diagonalized + ! N_st_diag : Number of states in which H is diagonalized. Assumed > sze ! ! iunit : Unit for the I/O ! @@ -89,24 +89,27 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer :: i,j,k,l,m logical :: converged - double precision, allocatable :: overlap(:,:) double precision :: u_dot_v, u_dot_u integer, allocatable :: kl_pairs(:,:) integer :: k_pairs, kl integer :: iter2 - double precision, allocatable :: W(:,:,:), U(:,:,:), R(:,:), S(:,:,:) - double precision, allocatable :: y(:,:,:,:), h(:,:,:,:), lambda(:), s2(:) - double precision, allocatable :: c(:), H_small(:,:) + double precision, allocatable :: W(:,:), U(:,:), R(:,:), S(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) + double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) double precision :: diag_h_mat_elem double precision, allocatable :: residual_norm(:) character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall + integer :: shift, shift2 include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, S, y, h, lambda + if (N_st_diag > sze) then + stop 'error in Davidson : N_st_diag > sze' + endif PROVIDE nuclear_repulsion @@ -140,29 +143,31 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer, external :: align_double sze_8 = align_double(sze) - double precision :: delta - - if (s2_eig) then - delta = 1.d0 - else - delta = 0.d0 - endif - allocate( & kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & - W(sze_8,N_st_diag,davidson_sze_max), & - U(sze_8,N_st_diag,davidson_sze_max), & + W(sze_8,N_st_diag*davidson_sze_max), & + U(sze_8,N_st_diag*davidson_sze_max), & R(sze_8,N_st_diag), & - S(sze_8,N_st_diag,davidson_sze_max), & - h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & - y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & + S(sze_8,N_st_diag*davidson_sze_max), & + h(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + y(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + s_(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + s_tmp(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & residual_norm(N_st_diag), & - overlap(N_st_diag,N_st_diag), & c(N_st_diag*davidson_sze_max), & - H_small(N_st_diag,N_st_diag), & - s2(N_st_diag), & + s2(N_st_diag*davidson_sze_max), & lambda(N_st_diag*davidson_sze_max)) + h = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + c = 0.d0 + U = 0.d0 + S = 0.d0 + R = 0.d0 + y = 0.d0 + + ASSERT (N_st > 0) ASSERT (N_st_diag >= N_st) ASSERT (sze > 0) @@ -174,16 +179,17 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s converged = .False. - do k=1,N_st_diag + do k=1,N_st + call normalize(u_in(1,k),sze) + enddo - if (k > N_st) then + do k=N_st+1,N_st_diag do i=1,sze double precision :: r1, r2 call random_number(r1) call random_number(r2) u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2) enddo - endif ! Gram-Schmidt ! ------------ @@ -194,22 +200,26 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call normalize(u_in(1,k),sze) enddo - do while (.not.converged) do k=1,N_st_diag do i=1,sze - U(i,k,1) = u_in(i,k) + U(i,k) = u_in(i,k) enddo enddo do iter=1,davidson_sze_max-1 + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + + ! Compute |W_k> = \sum_i |i> ! ----------------------------------------- - call H_S2_u_0_nstates(W(1,1,iter),S(1,1,iter),U(1,1,iter),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + + call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) ! Compute h_kl = = @@ -229,60 +239,104 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! enddo ! enddo - call dgemm('T','N', N_st_diag*iter, N_st_diag, sze, & - 1.d0, U, size(U,1), W(1,1,iter), size(W,1), & - 0.d0, h(1,1,1,iter), size(h,1)*size(h,2)) + call dgemm('T','N', shift2, N_st_diag, sze, & + 1.d0, U, size(U,1), W(1,shift+1), size(W,1), & + 0.d0, h(1,shift+1), size(h,1)) + + call dgemm('T','N', shift2, N_st_diag, sze, & + 1.d0, U, size(U,1), S(1,shift+1), size(S,1), & + 0.d0, s_(1,shift+1), size(s_,1)) ! Diagonalize h ! ------------- - call lapack_diag(lambda,y,h,N_st_diag*davidson_sze_max,N_st_diag*iter) + call lapack_diag(lambda,y,h,size(h,1),shift2) + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + S_z2_Sz + enddo + + if (s2_eig) then + logical :: state_ok(N_st_diag*davidson_sze_max) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + endif + + ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- - do k=1,N_st_diag - do i=1,sze - U(i,k,iter+1) = 0.d0 - W(i,k,iter+1) = 0.d0 - S(i,k,iter+1) = 0.d0 - enddo - enddo ! do k=1,N_st_diag -! do iter2=1,iter -! do l=1,N_st_diag -! do i=1,sze -! U(i,k,iter+1) = U(i,k,iter+1) + U(i,l,iter2)*y(l,iter2,k,1) -! W(i,k,iter+1) = W(i,k,iter+1) + W(i,l,iter2)*y(l,iter2,k,1) -! enddo +! do i=1,sze +! U(i,shift2+k) = 0.d0 +! W(i,shift2+k) = 0.d0 +! S(i,shift2+k) = 0.d0 +! enddo +! do l=1,N_st_diag*iter +! do i=1,sze +! U(i,shift2+k) = U(i,shift2+k) + U(i,l)*y(l,k) +! W(i,shift2+k) = W(i,shift2+k) + W(i,l)*y(l,k) +! S(i,shift2+k) = S(i,shift2+k) + S(i,l)*y(l,k) ! enddo ! enddo ! enddo ! ! - call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, & - 1.d0, U, size(U,1), y, size(y,1)*size(y,2), 0.d0, U(1,1,iter+1), size(U,1)) - call dgemm('N','N',sze,N_st_diag,N_st_diag*iter, & - 1.d0, W, size(W,1), y, size(y,1)*size(y,2), 0.d0, W(1,1,iter+1), size(W,1)) - call dgemm('N','N',sze,N_st_diag,1, & - 1.d0, S, size(S,1), y, size(y,1)*size(y,2), 0.d0, S(1,1,iter+1), size(S,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1)) ! Compute residual vector ! ----------------------- - do k=1,N_st_diag - s2(k) = u_dot_v(U(1,k,iter+1), S(1,k,iter+1), sze) - enddo - +! do k=1,N_st_diag +! print *, s2(k) +! s2(k) = u_dot_v(U(1,shift2+k), S(1,shift2+k), sze) + S_z2_Sz +! print *, s2(k) +! print *, '' +! pause +! enddo do k=1,N_st_diag do i=1,sze - R(i,k) = (lambda(k) * U(i,k,iter+1) - W(i,k,iter+1) ) & - * (1.d0 + s2(k) * U(i,k,iter+1) - S(i,k,iter+1) ) + R(i,k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz) enddo if (k <= N_st) then residual_norm(k) = u_dot_u(R(1,k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion to_print(2,k) = s2(k) to_print(3,k) = residual_norm(k) + if (residual_norm(k) > 1.e9) then + stop 'Davidson failed' + endif endif enddo @@ -297,7 +351,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag do i=1,sze - U(i,k,iter+1) = - R(i,k)/max(H_jj(i) - lambda(k),1.d-2) + U(i,shift2+k) = - R(i,k)/max(H_jj(i) - lambda(k),1.d-2) enddo enddo @@ -306,33 +360,31 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag -! do iter2=1,iter -! do l=1,N_st_diag -! c(1) = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) +! do l=1,N_st_diag*iter +! c(1) = u_dot_v(U(1,shift2+k),U(1,l),sze) ! do i=1,sze -! U(i,k,iter+1) = U(i,k,iter+1) - c(1) * U(i,l,iter2) +! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,l) ! enddo -! enddo ! enddo ! call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1), & - U(1,k,iter+1),1,0.d0,c,1) + U(1,shift2+k),1,0.d0,c,1) call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1), & - c,1,1.d0,U(1,k,iter+1),1) + c,1,1.d0,U(1,shift2+k),1) ! ! do l=1,k-1 -! c(1) = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) +! c(1) = u_dot_v(U(1,shift2+k),U(1,shift2+l),sze) ! do i=1,sze -! U(i,k,iter+1) = U(i,k,iter+1) - c(1) * U(i,l,iter+1) +! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,shift2+l) ! enddo ! enddo ! - call dgemv('T',sze,k-1,1.d0,U(1,1,iter+1),size(U,1), & - U(1,k,iter+1),1,0.d0,c,1) - call dgemv('N',sze,k-1,-1.d0,U(1,1,iter+1),size(U,1), & - c,1,1.d0,U(1,k,iter+1),1) + call dgemv('T',sze,k-1,1.d0,U(1,shift2+1),size(U,1), & + U(1,shift2+k),1,0.d0,c,1) + call dgemv('N',sze,k-1,-1.d0,U(1,shift2+1),size(U,1), & + c,1,1.d0,U(1,shift2+k),1) - call normalize( U(1,k,iter+1), sze ) + call normalize( U(1,shift2+k), sze ) enddo enddo @@ -346,23 +398,19 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag energies(k) = lambda(k) - do i=1,sze - u_in(i,k) = 0.d0 - enddo enddo + ! do k=1,N_st_diag ! do i=1,sze -! do iter2=1,iter -! do l=1,N_st_diag -! u_in(i,k) += U(i,l,iter2)*y(l,iter2,k,1) +! do l=1,iter*N_st_diag +! u_in(i,k) += U(i,l)*y(l,k) ! enddo ! enddo ! enddo ! enddo call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & - U, size(U,1), y, N_st_diag*davidson_sze_max, & - 0.d0, u_in, size(u_in,1)) + U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) enddo @@ -377,10 +425,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s deallocate ( & kl_pairs, & W, residual_norm, & - U, overlap, & - R, c, & + U, & + R, c, S, & h, & - y, & + y, s_, s_tmp, & lambda & ) end diff --git a/src/Davidson/diagonalize_CI.irp.f b/src/Davidson/diagonalize_CI.irp.f index 8e9a0543..ecd2d6b2 100644 --- a/src/Davidson/diagonalize_CI.irp.f +++ b/src/Davidson/diagonalize_CI.irp.f @@ -55,12 +55,16 @@ END_PROVIDER if (diag_algorithm == "Davidson") then +! call davidson_diag(psi_det,CI_eigenvectors,CI_electronic_energy, & +! size(CI_eigenvectors,1), & +! N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,output_determinants) +! call davidson_diag_HS2(psi_det,CI_eigenvectors, & size(CI_eigenvectors,1),CI_electronic_energy, & - N_det,N_states,N_states_diag,N_int,output_determinants) + N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,output_determinants) call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int,& - N_states_diag,size(CI_eigenvectors,1)) + min(N_det,N_states_diag),size(CI_eigenvectors,1)) else if (diag_algorithm == "Lapack") then @@ -145,71 +149,6 @@ END_PROVIDER deallocate(eigenvectors,eigenvalues) endif - - if( s2_eig.and.(N_states_diag > 1).and.(N_det >= N_states_diag) )then - ! Diagonalizing S^2 within the "n_states_diag" states found - allocate(s2_eigvalues(N_states_diag), e_array(N_states_diag)) - call diagonalize_s2_betweenstates(psi_det,CI_eigenvectors,N_det,size(psi_det,3), & - size(CI_eigenvectors,1),min(n_states_diag,n_det),s2_eigvalues) - - double precision, allocatable :: psi_coef_tmp(:,:) - allocate(psi_coef_tmp(psi_det_size,N_states_diag)) - do j = 1, N_states_diag - do i = 1, N_det - psi_coef_tmp(i,j) = CI_eigenvectors(i,j) - enddo - enddo - call u_0_H_u_0(e_array,psi_coef_tmp,n_det,psi_det,N_int,N_states_diag,psi_det_size) - - ! Browsing the "n_states_diag" states and getting the lowest in energy "n_states" ones that have the S^2 value - ! closer to the "expected_s2" set as input - - allocate(index_good_state_array(N_det),good_state_array(N_det)) - good_state_array = .False. - i_state = 0 - do j = 1, N_states_diag - if(dabs(s2_eigvalues(j)-expected_s2).le.0.5d0)then - good_state_array(j) = .True. - i_state +=1 - index_good_state_array(i_state) = j - endif - enddo - ! Sorting the i_state good states by energy - allocate(iorder(i_state)) - do j = 1, i_state - do i = 1, N_det - CI_eigenvectors(i,j) = psi_coef_tmp(i,index_good_state_array(j)) - enddo - CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(j)) - CI_electronic_energy(j) = e_array(j) - iorder(j) = j - enddo - call dsort(e_array,iorder,i_state) - do j = 1, i_state - CI_electronic_energy(j) = e_array(j) - CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(iorder(j))) - do i = 1, N_det - CI_eigenvectors(i,j) = psi_coef_tmp(i,index_good_state_array(iorder(j))) - enddo - enddo - - ! Then setting the other states without any specific energy order - i_other_state = 0 - do j = 1, N_states_diag - if(good_state_array(j))cycle - i_other_state +=1 - do i = 1, N_det - CI_eigenvectors(i,i_state + i_other_state) = psi_coef_tmp(i,j) - enddo - CI_eigenvectors_s2(i_state + i_other_state) = s2_eigvalues(j) - CI_electronic_energy(i_state + i_other_state) = e_array(i_state + i_other_state) - enddo - deallocate(iorder,e_array,index_good_state_array,good_state_array,psi_coef_tmp) - - deallocate(s2_eigvalues) - - endif - END_PROVIDER subroutine diagonalize_CI diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index fe8efcf2..6b3f2782 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -58,8 +58,8 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer, external :: align_double !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut - if(N_st /= N_states) stop "H_u_0_nstates N_st /= N_states" - N_st_8 = N_states ! align_double(N_st) + if(N_st /= N_states_diag) stop "H_u_0_nstates N_st /= N_states_diag" + N_st_8 = N_states_diag ! align_double(N_st) ASSERT (Nint > 0) ASSERT (Nint == N_int) @@ -214,7 +214,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer(ZMQ_PTR) :: handler - if(N_st /= N_states .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates" + if(N_st /= N_states_diag .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates" N_st_8 = N_st !! align_double(N_st) ASSERT (Nint > 0) @@ -249,7 +249,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) call davidson_init(handler) do sh=shortcut(0,1),1,-1 workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 - if(workload > 10000) then + if(workload > 100000) then blocke = sh call davidson_add_task(handler, blocke, blockb) blockb = sh-1 @@ -257,13 +257,13 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) end if enddo - if(blockb) call davidson_add_task(handler, 1, blockb) - + if(blockb > 0) call davidson_add_task(handler, 1, blockb) call davidson_run(handler, v_0, s_0) - + !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) + allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 @@ -311,6 +311,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) enddo enddo + deallocate (shortcut, sort_idx, sorted, version) end diff --git a/src/Determinants/psi_cas.irp.f b/src/Determinants/psi_cas.irp.f index 304a2370..968ced57 100644 --- a/src/Determinants/psi_cas.irp.f +++ b/src/Determinants/psi_cas.irp.f @@ -21,9 +21,9 @@ use bitmasks do k=1,N_int good = good .and. ( & iand(not(cas_bitmask(k,1,l)), psi_det(k,1,i)) == & - iand(not(cas_bitmask(k,1,l)), psi_det(k,1,1)) ) .and. ( & + iand(not(cas_bitmask(k,1,l)), hf_bitmask(k,1)) ) .and. ( & iand(not(cas_bitmask(k,2,l)), psi_det(k,2,i)) == & - iand(not(cas_bitmask(k,2,l)), psi_det(k,2,1)) ) + iand(not(cas_bitmask(k,2,l)), hf_bitmask(k,2)) ) enddo if (good) then exit diff --git a/src/Integrals_Bielec/map_integrals.irp.f b/src/Integrals_Bielec/map_integrals.irp.f index fdcf4038..28b7d2e2 100644 --- a/src/Integrals_Bielec/map_integrals.irp.f +++ b/src/Integrals_Bielec/map_integrals.irp.f @@ -109,6 +109,42 @@ subroutine bielec_integrals_index_reverse(i,j,k,l,i1) end + BEGIN_PROVIDER [ integer, ao_integrals_cache_min ] +&BEGIN_PROVIDER [ integer, ao_integrals_cache_max ] + implicit none + BEGIN_DOC + ! Min and max values of the AOs for which the integrals are in the cache + END_DOC + ao_integrals_cache_min = max(1,ao_num - 63) + ao_integrals_cache_max = ao_num + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, ao_integrals_cache, (ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max) ] + implicit none + BEGIN_DOC + ! Cache of AO integrals for fast access + END_DOC + PROVIDE ao_bielec_integrals_in_map + integer :: i,j,k,l + integer(key_kind) :: idx + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx) + do l=ao_integrals_cache_min,ao_integrals_cache_max + do k=ao_integrals_cache_min,ao_integrals_cache_max + do j=ao_integrals_cache_min,ao_integrals_cache_max + do i=ao_integrals_cache_min,ao_integrals_cache_max + !DIR$ FORCEINLINE + call bielec_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(ao_integrals_map,idx,ao_integrals_cache(i,j,k,l)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + double precision function get_ao_bielec_integral(i,j,k,l,map) use map_module @@ -127,8 +163,20 @@ double precision function get_ao_bielec_integral(i,j,k,l,map) else if (ao_bielec_integral_schwartz(i,k)*ao_bielec_integral_schwartz(j,l) < ao_integrals_threshold) then tmp = 0.d0 else - call bielec_integrals_index(i,j,k,l,idx) - call map_get(map,idx,tmp) + if ( (i >= ao_integrals_cache_min) .and. & + (j >= ao_integrals_cache_min) .and. & + (k >= ao_integrals_cache_min) .and. & + (l >= ao_integrals_cache_min) .and. & + (i <= ao_integrals_cache_max) .and. & + (j <= ao_integrals_cache_max) .and. & + (k <= ao_integrals_cache_max) .and. & + (l <= ao_integrals_cache_max) ) then + tmp = ao_integrals_cache(i,j,k,l) + else + !DIR$ FORCEINLINE + call bielec_integrals_index(i,j,k,l,idx) + call map_get(map,idx,tmp) + endif endif get_ao_bielec_integral = tmp end @@ -155,16 +203,9 @@ subroutine get_ao_bielec_integrals(j,k,l,sze,out_val) return endif + double precision :: get_ao_bielec_integral do i=1,sze - if (ao_overlap_abs(i,k)*ao_overlap_abs(j,l) < thresh ) then - out_val(i) = 0.d0 - else if (ao_bielec_integral_schwartz(i,k)*ao_bielec_integral_schwartz(j,l) < thresh) then - out_val(i)=0.d0 - else - !DIR$ FORCEINLINE - call bielec_integrals_index(i,j,k,l,hash) - call map_get(ao_integrals_map, hash, out_val(i)) - endif + out_val(i) = get_ao_bielec_integral(i,j,k,l,ao_integrals_map) enddo end @@ -276,6 +317,43 @@ subroutine insert_into_mo_integrals_map(n_integrals, & call map_update(mo_integrals_map, buffer_i, buffer_values, n_integrals, thr) end + BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] + implicit none + BEGIN_DOC + ! Min and max values of the MOs for which the integrals are in the cache + END_DOC + mo_integrals_cache_min = max(1,elec_alpha_num - 31) + mo_integrals_cache_max = min(mo_tot_num,elec_alpha_num + 32) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max) ] + implicit none + BEGIN_DOC + ! Cache of MO integrals for fast access + END_DOC + PROVIDE mo_bielec_integrals_in_map + integer :: i,j,k,l + integer(key_kind) :: idx + FREE ao_integrals_cache + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + do j=mo_integrals_cache_min,mo_integrals_cache_max + do i=mo_integrals_cache_min,mo_integrals_cache_max + !DIR$ FORCEINLINE + call bielec_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(mo_integrals_map,idx,mo_integrals_cache(i,j,k,l)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + double precision function get_mo_bielec_integral(i,j,k,l,map) use map_module implicit none @@ -287,11 +365,22 @@ double precision function get_mo_bielec_integral(i,j,k,l,map) type(map_type), intent(inout) :: map real(integral_kind) :: tmp PROVIDE mo_bielec_integrals_in_map - !DIR$ FORCEINLINE - call bielec_integrals_index(i,j,k,l,idx) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - get_mo_bielec_integral = dble(tmp) + if ( (i >= mo_integrals_cache_min) .and. & + (j >= mo_integrals_cache_min) .and. & + (k >= mo_integrals_cache_min) .and. & + (l >= mo_integrals_cache_min) .and. & + (i <= mo_integrals_cache_max) .and. & + (j <= mo_integrals_cache_max) .and. & + (k <= mo_integrals_cache_max) .and. & + (l <= mo_integrals_cache_max) ) then + get_mo_bielec_integral = mo_integrals_cache(i,j,k,l) + else + !DIR$ FORCEINLINE + call bielec_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + get_mo_bielec_integral = dble(tmp) + endif end double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map) @@ -306,14 +395,12 @@ double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map) real(integral_kind) :: tmp PROVIDE mo_bielec_integrals_in_map if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then + double precision, external :: get_mo_bielec_integral !DIR$ FORCEINLINE - call bielec_integrals_index(i,j,k,l,idx) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) + get_mo_bielec_integral_schwartz = get_mo_bielec_integral(i,j,k,l,map) else tmp = 0.d0 endif - get_mo_bielec_integral_schwartz = dble(tmp) end @@ -325,6 +412,7 @@ double precision function mo_bielec_integral(i,j,k,l) integer, intent(in) :: i,j,k,l double precision :: get_mo_bielec_integral PROVIDE mo_bielec_integrals_in_map + !DIR$ FORCEINLINE mo_bielec_integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) return end diff --git a/src/Integrals_Monoelec/kin_ao_ints.irp.f b/src/Integrals_Monoelec/kin_ao_ints.irp.f index e3e92348..6cb2aa49 100644 --- a/src/Integrals_Monoelec/kin_ao_ints.irp.f +++ b/src/Integrals_Monoelec/kin_ao_ints.irp.f @@ -131,8 +131,8 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] integer :: i,j,k,l if (read_ao_one_integrals) then - call read_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,& - size(ao_kinetic_integral,1), size(ao_kinetic_integral,2)) + call ezfio_get_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) + call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) print *, 'AO kinetic integrals read from disk' else !$OMP PARALLEL DO DEFAULT(NONE) & @@ -151,8 +151,7 @@ BEGIN_PROVIDER [double precision, ao_kinetic_integral, (ao_num_align,ao_num)] !$OMP END PARALLEL DO endif if (write_ao_one_integrals) then - call write_one_e_integrals('ao_kinetic_integral', ao_kinetic_integral,& - size(ao_kinetic_integral,1), size(ao_kinetic_integral,2)) + call ezfio_set_ao_basis_integral_kinetic(ao_kinetic_integral(1:ao_num, 1:ao_num)) print *, 'AO kinetic integrals written to disk' endif END_PROVIDER diff --git a/src/Integrals_Monoelec/pot_ao_ints.irp.f b/src/Integrals_Monoelec/pot_ao_ints.irp.f index 45b9067a..7116d2c7 100644 --- a/src/Integrals_Monoelec/pot_ao_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_ints.irp.f @@ -11,8 +11,7 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult if (read_ao_one_integrals) then - call read_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, & - size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2)) + call ezfio_get_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) print *, 'AO N-e integrals read from disk' else @@ -74,8 +73,7 @@ BEGIN_PROVIDER [ double precision, ao_nucl_elec_integral, (ao_num_align,ao_num)] !$OMP END PARALLEL endif if (write_ao_one_integrals) then - call write_one_e_integrals('ao_ne_integral', ao_nucl_elec_integral, & - size(ao_nucl_elec_integral,1), size(ao_nucl_elec_integral,2)) + call ezfio_set_ao_basis_integral_nuclear(ao_nucl_elec_integral(1:ao_num, 1:ao_num)) print *, 'AO N-e integrals written to disk' endif diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f index 28437b27..b34b201e 100644 --- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f @@ -53,6 +53,13 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu call wall_time(wall_1) call cpu_time(cpu_1) +!write(33,*) 'xxxLOCxxx' +!write(33,*) 'pseudo_klocmax', pseudo_klocmax +!write(33,*) 'pseudo_v_k_transp ', pseudo_v_k_transp +!write(33,*) 'pseudo_n_k_transp ', pseudo_n_k_transp +!write(33,*) 'pseudo_dz_k_transp', pseudo_dz_k_transp +!write(33,*) 'xxxLOCxxx' + thread_num = 0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -102,7 +109,15 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu pseudo_n_k_transp (1,k), & pseudo_dz_k_transp(1,k), & A_center,power_A,alpha,B_center,power_B,beta,C_center) - +! write(33,*) i,j,k +! write(33,*) A_center,power_A,alpha,B_center,power_B,beta,C_center, & +! Vloc(pseudo_klocmax, & +! pseudo_v_k_transp (1,k), & +! pseudo_n_k_transp (1,k), & +! pseudo_dz_k_transp(1,k), & +! A_center,power_A,alpha,B_center,power_B,beta,C_center) +! write(33,*) + enddo ao_pseudo_integral_local(i,j) = ao_pseudo_integral_local(i,j) +& ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c @@ -123,7 +138,6 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu !$OMP END DO !$OMP END PARALLEL - END_PROVIDER @@ -151,6 +165,13 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu call wall_time(wall_1) call cpu_time(cpu_1) thread_num = 0 +!write(34,*) 'xxxNONLOCxxx' +!write(34,*) ' pseudo_lmax,pseudo_kmax', pseudo_lmax,pseudo_kmax +!write(34,*) ' pseudo_v_kl_transp(1,0,k)', pseudo_v_kl_transp +!write(34,*) ' pseudo_n_kl_transp(1,0,k)', pseudo_n_kl_transp +!write(34,*) ' pseudo_dz_kl_transp(1,0,k)', pseudo_dz_kl_transp +!write(34,*) 'xxxNONLOCxxx' + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& @@ -201,6 +222,15 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu pseudo_n_kl_transp(1,0,k), & pseudo_dz_kl_transp(1,0,k), & A_center,power_A,alpha,B_center,power_B,beta,C_center) +! write(34,*) i,j,k +! write(34,*) & +! A_center,power_A,alpha,B_center,power_B,beta,C_center, & +! Vpseudo(pseudo_lmax,pseudo_kmax, & +! pseudo_v_kl_transp(1,0,k), & +! pseudo_n_kl_transp(1,0,k), & +! pseudo_dz_kl_transp(1,0,k), & +! A_center,power_A,alpha,B_center,power_B,beta,C_center) +! write(34,*) '' enddo ao_pseudo_integral_non_local(i,j) = ao_pseudo_integral_non_local(i,j) +& ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c @@ -223,7 +253,6 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu !$OMP END PARALLEL - END_PROVIDER BEGIN_PROVIDER [ double precision, pseudo_v_k_transp, (pseudo_klocmax,nucl_num) ] diff --git a/src/Integrals_Monoelec/pseudopot.f90 b/src/Integrals_Monoelec/pseudopot.f90 index c89bb019..d77b3ca0 100644 --- a/src/Integrals_Monoelec/pseudopot.f90 +++ b/src/Integrals_Monoelec/pseudopot.f90 @@ -136,7 +136,7 @@ end if(l.eq.2.and.m.eq.2)ylm_real=dsqrt(15.d0/16.d0/pi)*(xchap*xchap-ychap*ychap) if(l.eq.2.and.m.eq.1)ylm_real=dsqrt(15.d0/fourpi)*xchap*zchap - if(l.eq.2.and.m.eq.0)ylm_real=dsqrt(5.d0/16.d0/pi)*(-xchap*xchap-ychap*ychap+2.d0*zchap*zchap) + if(l.eq.2.and.m.eq.0)ylm_real=dsqrt(5.d0/16.d0/pi)*(2.d0*zchap*zchap-xchap*xchap-ychap*ychap) if(l.eq.2.and.m.eq.-1)ylm_real=dsqrt(15.d0/fourpi)*ychap*zchap if(l.eq.2.and.m.eq.-2)ylm_real=dsqrt(15.d0/fourpi)*xchap*ychap @@ -276,30 +276,16 @@ if(ac.eq.0.d0.and.bc.eq.0.d0)then do k=1,kmax do l=0,lmax ktot=ntot+n_kl(k,l) + if (v_kl(k,l) == 0.d0) cycle do m=-l,l prod=bigI(0,0,l,m,n_a(1),n_a(2),n_a(3)) + if (prod == 0.d0) cycle prodp=bigI(0,0,l,m,n_b(1),n_b(2),n_b(3)) - - accu=accu+prod*prodp*v_kl(k,l)*int_prod_bessel(ktot+2,g_a+g_b+dz_kl(k,l),0,0,areal,breal,arg) - + if (prodp == 0.d0) cycle + accu=accu+prod*prodp*v_kl(k,l)*int_prod_bessel(ktot+2,g_a+g_b+dz_kl(k,l),0,0,areal,breal,arg) enddo enddo enddo -! do k=1,kmax -! do l=0,lmax -! ktot=ntot+n_kl(k,l) -! do m=-l,l -! prod =bigI(0,0,l,m,n_a(1),n_a(2),n_a(3))*v_kl(k,l) -! prodp=bigI(0,0,l,m,n_b(1),n_b(2),n_b(3))*prod -! if (dabs (prodp) < 1.d-15) then -! cycle -! endif -! -! accu=accu+prodp*int_prod_bessel(ktot+2,g_a+g_b+dz_kl(k,l),0,0,areal,breal,arg) -! -! enddo -! enddo -! enddo !=!=!=!=! ! E n d ! @@ -386,14 +372,17 @@ else if(ac.ne.0.d0.and.bc.ne.0.d0)then enddo do k3=0,n_a(3) + if (array_coefs_A(k3,3) == 0.d0) cycle do k2=0,n_a(2) + if (array_coefs_A(k2,2) == 0.d0) cycle do k1=0,n_a(1) - + if (array_coefs_A(k1,1) == 0.d0) cycle + do lambda=0,l+ntotA do mu=-lambda,lambda prod=ylm(lambda,mu,theta_AC0,phi_AC0)*array_coefs_A(k1,1)*array_coefs_A(k2,2)*array_coefs_A(k3,3)*array_I_A(mu,lambda,k1,k2,k3) - + if (prod == 0.d0) cycle do k3p=0,n_b(3) do k2p=0,n_b(2) @@ -405,6 +394,7 @@ else if(ac.ne.0.d0.and.bc.ne.0.d0)then array_coefs_B(k1p,1)*array_coefs_B(k2p,2)*array_coefs_B(k3p,3)* & array_I_B(mup,lambdap,k1p,k2p,k3p) + if (prodp == 0.d0) cycle do k=1,kmax ktot=k1+k2+k3+k1p+k2p+k3p+n_kl(k,l) accu=accu+prodp*v_kl(k,l)*array_R(k,ktot,l,lambda,lambdap) @@ -490,13 +480,18 @@ else if(ac.eq.0.d0.and.bc.ne.0.d0)then prod=bigI(0,0,l,m,n_a(1),n_a(2),n_a(3)) do k3p=0,n_b(3) + if (array_coefs_B(k3p,3) == 0.d0) cycle do k2p=0,n_b(2) + if (array_coefs_B(k2p,2) == 0.d0) cycle do k1p=0,n_b(1) + if (array_coefs_B(k1p,1) == 0.d0) cycle do lambdap=0,l+ntotB do mup=-lambdap,lambdap prodp=prod*array_coefs_B(k1p,1)*array_coefs_B(k2p,2)*array_coefs_B(k3p,3)*ylm(lambdap,mup,theta_BC0,phi_BC0)*array_I_B(mup,lambdap,k1p,k2p,k3p) + if (prodp == 0.d0) cycle + do k=1,kmax ktot=ntotA+k1p+k2p+k3p+n_kl(k,l) @@ -573,13 +568,19 @@ else if(ac.ne.0.d0.and.bc.eq.0.d0)then enddo do k3=0,n_a(3) + if (array_coefs_A(k3,3) == 0.d0) cycle do k2=0,n_a(2) + if (array_coefs_A(k2,2) == 0.d0) cycle do k1=0,n_a(1) + if (array_coefs_A(k1,1) == 0.d0) cycle do lambda=0,l+ntotA do mu=-lambda,lambda prod=array_coefs_A(k1,1)*array_coefs_A(k2,2)*array_coefs_A(k3,3)*ylm(lambda,mu,theta_AC0,phi_AC0)*array_I_A(mu,lambda,k1,k2,k3) + if (prod == 0.d0) cycle prodp=prod*bigI(0,0,l,m,n_b(1),n_b(2),n_b(3)) + + if (prodp == 0.d0) cycle do k=1,kmax ktot=k1+k2+k3+ntotB+n_kl(k,l) @@ -812,18 +813,22 @@ double precision int_prod_bessel_loc,binom_func,accu,prod,ylm,bigI,arg phi_DC0=datan2(d(2)/d2,d(1)/d2) do k=1,klocmax + if (v_k(k) == 0.d0) cycle do k1=0,n_a(1) do k2=0,n_a(2) do k3=0,n_a(3) do k1p=0,n_b(1) do k2p=0,n_b(2) do k3p=0,n_b(3) + if (array_coefs(k1,k2,k3,k1p,k2p,k3p) == 0.d0) cycle do l=0,ntot do m=-l,l coef=ylm(l,m,theta_DC0,phi_DC0) + if (coef == 0.d0) cycle + ktot=k1+k2+k3+k1p+k2p+k3p+n_k(k) + if (array_R_loc(ktot,k,l) == 0.d0) cycle prod=coef*array_coefs(k1,k2,k3,k1p,k2p,k3p) & *bigI(l,m,0,0,k1+k1p,k2+k2p,k3+k3p) - ktot=k1+k2+k3+k1p+k2p+k3p+n_k(k) accu=accu+prod*v_k(k)*array_R_loc(ktot,k,l) enddo enddo @@ -864,18 +869,24 @@ double precision pi,sum,factor1,factor2,cylm,cylmp,bigA,binom_func,fact,coef_pm double precision sgn, sgnp pi=dacos(-1.d0) +bigI=0.d0 if(mu.gt.0.and.m.gt.0)then sum=0.d0 factor1=dsqrt((2*lambda+1)*fact(lambda-iabs(mu))/(2.d0*pi*fact(lambda+iabs(mu)))) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)*fact(l-iabs(m))/(2.d0*pi*fact(l+iabs(m)))) +if (factor2== 0.d0) return sgn = 1.d0 do k=0,mu/2 do i=0,lambda-mu + if (coef_pm(lambda,i+mu) == 0.d0) cycle sgnp = 1.d0 do kp=0,m/2 do ip=0,l-m cylm=sgn*factor1*binom_func(mu,2*k)*fact(mu+i)/fact(i)*coef_pm(lambda,i+mu) + if (cylm == 0.d0) cycle cylmp=sgnp*factor2*binom_func(m,2*kp)*fact(m+ip)/fact(ip)*coef_pm(l,ip+m) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(mu-2*k+m-2*kp+k1,2*k+2*kp+k2,i+ip+k3) enddo sgnp = -sgnp @@ -889,12 +900,16 @@ endif if(mu.eq.0.and.m.eq.0)then factor1=dsqrt((2*lambda+1)/(4.d0*pi)) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)/(4.d0*pi)) +if (factor2== 0.d0) return sum=0.d0 do i=0,lambda do ip=0,l cylm=factor1*coef_pm(lambda,i) + if (cylm == 0.d0) cycle cylmp=factor2*coef_pm(l,ip) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(k1,k2,i+ip+k3) enddo enddo @@ -904,14 +919,18 @@ endif if(mu.eq.0.and.m.gt.0)then factor1=dsqrt((2*lambda+1)/(4.d0*pi)) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)*fact(l-iabs(m))/(2.d0*pi*fact(l+iabs(m)))) +if (factor2== 0.d0) return sum=0.d0 do i=0,lambda sgnp = 1.d0 do kp=0,m/2 do ip=0,l-m cylm=factor1*coef_pm(lambda,i) + if (cylm == 0.d0) cycle cylmp=sgnp*factor2*binom_func(m,2*kp)*fact(m+ip)/fact(ip)*coef_pm(l,ip+m) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(m-2*kp+k1,2*kp+k2,i+ip+k3) enddo sgnp = -sgnp @@ -924,13 +943,18 @@ endif if(mu.gt.0.and.m.eq.0)then sum=0.d0 factor1=dsqrt((2*lambda+1)*fact(lambda-iabs(mu))/(2.d0*pi*fact(lambda+iabs(mu)))) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)/(4.d0*pi)) +if (factor2== 0.d0) return sgn = 1.d0 do k=0,mu/2 do i=0,lambda-mu + if (coef_pm(lambda,i+mu) == 0.d0) cycle do ip=0,l cylm=sgn*factor1*binom_func(mu,2*k)*fact(mu+i)/fact(i)*coef_pm(lambda,i+mu) + if (cylm == 0.d0) cycle cylmp=factor2*coef_pm(l,ip) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(mu-2*k +k1,2*k +k2,i+ip +k3) enddo enddo @@ -944,16 +968,22 @@ if(mu.lt.0.and.m.lt.0)then mu=-mu m=-m factor1=dsqrt((2*lambda+1)*fact(lambda-iabs(mu))/(2.d0*pi*fact(lambda+iabs(mu)))) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)*fact(l-iabs(m))/(2.d0*pi*fact(l+iabs(m)))) +if (factor2== 0.d0) return sum=0.d0 sgn = 1.d0 do k=0,(mu-1)/2 do i=0,lambda-mu + if (coef_pm(lambda,i+mu) == 0.d0) cycle sgnp = 1.d0 do kp=0,(m-1)/2 do ip=0,l-m + if (coef_pm(l,ip+m) == 0.d0) cycle cylm=sgn*factor1*binom_func(mu,2*k+1)*fact(mu+i)/fact(i)*coef_pm(lambda,i+mu) + if (cylm == 0.d0) cycle cylmp=sgnp*factor2*binom_func(m,2*kp+1)*fact(m+ip)/fact(ip)*coef_pm(l,ip+m) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(mu-(2*k+1)+m-(2*kp+1)+k1,(2*k+1)+(2*kp+1)+k2,i+ip+k3) enddo sgnp = -sgnp @@ -970,14 +1000,18 @@ endif if(mu.eq.0.and.m.lt.0)then m=-m factor1=dsqrt((2*lambda+1)/(4.d0*pi)) +if (factor1 == 0.d0) return factor2=dsqrt((2*l+1)*fact(l-iabs(m))/(2.d0*pi*fact(l+iabs(m)))) +if (factor2 == 0.d0) return sum=0.d0 do i=0,lambda sgnp = 1.d0 do kp=0,(m-1)/2 do ip=0,l-m cylm=factor1*coef_pm(lambda,i) + if (cylm == 0.d0) cycle cylmp=sgnp*factor2*binom_func(m,2*kp+1)*fact(m+ip)/fact(ip)*coef_pm(l,ip+m) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(m-(2*kp+1)+k1,2*kp+1+k2,i+ip+k3) enddo sgnp = -sgnp @@ -992,13 +1026,17 @@ if(mu.lt.0.and.m.eq.0)then sum=0.d0 mu=-mu factor1=dsqrt((2*lambda+1)*fact(lambda-iabs(mu))/(2.d0*pi*fact(lambda+iabs(mu)))) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)/(4.d0*pi)) +if (factor2== 0.d0) return sgn = 1.d0 do k=0,(mu-1)/2 do i=0,lambda-mu do ip=0,l cylm=sgn*factor1*binom_func(mu,2*k+1)*fact(mu+i)/fact(i)*coef_pm(lambda,i+mu) + if (cylm == 0.d0) cycle cylmp=factor2*coef_pm(l,ip) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(mu-(2*k+1)+k1,2*k+1+k2,i+ip+k3) enddo enddo @@ -1012,16 +1050,22 @@ endif if(mu.gt.0.and.m.lt.0)then sum=0.d0 factor1=dsqrt((2*lambda+1)*fact(lambda-iabs(mu))/(2.d0*pi*fact(lambda+iabs(mu)))) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)*fact(l-iabs(m))/(2.d0*pi*fact(l+iabs(m)))) +if (factor2== 0.d0) return m=-m sgn=1.d0 do k=0,mu/2 do i=0,lambda-mu + if (coef_pm(lambda,i+mu) == 0.d0) cycle sgnp=1.d0 do kp=0,(m-1)/2 do ip=0,l-m + if (coef_pm(l,ip+m) == 0.d0) cycle cylm =sgn *factor1*binom_func(mu,2*k)*fact(mu+i)/fact(i)*coef_pm(lambda,i+mu) + if (cylm == 0.d0) cycle cylmp=sgnp*factor2*binom_func(m,2*kp+1)*fact(m+ip)/fact(ip)*coef_pm(l,ip+m) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(mu-2*k+m-(2*kp+1)+k1,2*k+2*kp+1+k2,i+ip+k3) enddo sgnp = -sgnp @@ -1037,16 +1081,22 @@ endif if(mu.lt.0.and.m.gt.0)then mu=-mu factor1=dsqrt((2*lambda+1)*fact(lambda-iabs(mu))/(2.d0*pi*fact(lambda+iabs(mu)))) +if (factor1== 0.d0) return factor2=dsqrt((2*l+1)*fact(l-iabs(m))/(2.d0*pi*fact(l+iabs(m)))) +if (factor2== 0.d0) return sum=0.d0 sgn = 1.d0 do k=0,(mu-1)/2 do i=0,lambda-mu + if (coef_pm(lambda,i+mu) == 0.d0) cycle sgnp = 1.d0 do kp=0,m/2 do ip=0,l-m + if (coef_pm(l,ip+m) == 0.d0) cycle cylm=sgn*factor1 *binom_func(mu,2*k+1)*fact(mu+i)/fact(i)*coef_pm(lambda,i+mu) + if (cylm == 0.d0) cycle cylmp=sgnp*factor2*binom_func(m,2*kp)*fact(m+ip)/fact(ip)*coef_pm(l,ip+m) + if (cylmp == 0.d0) cycle sum=sum+cylm*cylmp*bigA(mu-(2*k+1)+m-2*kp+k1,2*k+1+2*kp+k2,i+ip+k3) enddo sgnp = -sgnp @@ -1544,7 +1594,7 @@ end r=(i-1)*dr x1=delta1*r x2=delta2*r - sum=sum+dr*r**(n+2)*dexp(-cc*r**2)*bessel_mod(x1,lambda)*bessel_mod(x2,lambdap) + sum=sum+dr*r**(n+2)*dexp(-cc*r*r)*bessel_mod(x1,lambda)*bessel_mod(x2,lambdap) enddo bigR=sum*factor end @@ -1569,8 +1619,8 @@ end return endif if(n.eq.0)a=dsinh(x)/x - if(n.eq.1)a=(x*dcosh(x)-dsinh(x))/x**2 - if(n.ge.2)a=bessel_mod_recur(n-2,x)-(2*n-1)/x*bessel_mod_recur(n-1,x) + if(n.eq.1)a=(x*dcosh(x)-dsinh(x))/(x*x) + if(n.ge.2)a=bessel_mod_recur(n-2,x)-(n+n-1)/x*bessel_mod_recur(n-1,x) end double precision function bessel_mod_exp(n,x) @@ -1579,8 +1629,8 @@ end double precision x,coef,accu,fact,dble_fact accu=0.d0 do k=0,10 - coef=1.d0/fact(k)/dble_fact(2*(n+k)+1) - accu=accu+(x**2/2.d0)**k*coef + coef=1.d0/(fact(k)*dble_fact(2*(n+k)+1)) + accu=accu+(0.5d0*x*x)**k*coef enddo bessel_mod_exp=x**n*accu end @@ -1778,15 +1828,15 @@ end double precision function coef_nk(n,k) implicit none - integer n,k, ISHFT + integer n,k double precision gam,dble_fact,fact + if (k<0) stop 'pseudopot.f90 : coef_nk' + if (k>63) stop 'pseudopot.f90 : coef_nk' gam=dble_fact(n+n+k+k+1) - -! coef_nk=1.d0/(dble(ISHFT(1,k))*fact(k)*gam) - - coef_nk=1.d0/(2.d0**k*fact(k)*gam) +! coef_nk=1.d0/(2.d0**k*fact(k)*gam) + coef_nk=1.d0/(dble(ibset(0_8,k))*fact(k)*gam) return @@ -1811,11 +1861,11 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg) double precision :: s_q_0, s_q_k, s_0_0, a_over_b_square double precision :: int_prod_bessel_loc double precision :: inverses(0:300) - double precision :: two_qkmp1, qk + double precision :: two_qkmp1, qk, mk, nk logical done - u=(a+b)/(2.d0*dsqrt(gam)) + u=(a+b)*0.5d0/dsqrt(gam) freal=dexp(-arg) if(a.eq.0.d0.and.b.eq.0.d0)then @@ -1859,6 +1909,7 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg) s_q_0 = s_0_0 + mk = dble(m) ! Loop over q for the convergence of the sequence do while (.not.done) @@ -1870,10 +1921,10 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg) stop 'pseudopot.f90 : q > 300' endif - two_qkmp1 = dble(2*(q+m)+1) qk = dble(q) + two_qkmp1 = 2.d0*(qk+mk)+1.d0 do k=0,q-1 - s_q_k = ( two_qkmp1*qk*inverses(k) ) * s_q_k + s_q_k = two_qkmp1*qk*inverses(k)*s_q_k sum=sum+s_q_k two_qkmp1 = two_qkmp1-2.d0 qk = qk-1.d0 diff --git a/src/Utils/transpose.irp.f b/src/Utils/transpose.irp.f new file mode 100644 index 00000000..32e502e9 --- /dev/null +++ b/src/Utils/transpose.irp.f @@ -0,0 +1,78 @@ +!DIR$ attributes forceinline :: transpose +recursive subroutine transpose(A,LDA,B,LDB,d1,d2) + implicit none + BEGIN_DOC +! Transpose input matrix A into output matrix B + END_DOC + integer, intent(in) :: d1, d2, LDA, LDB + real, intent(in) :: A(LDA,d2) + real, intent(out) :: B(LDB,d1) + + integer :: i,j,k, mod_align + if ( d2 < 32 ) then + do j=1,d1 + !DIR$ LOOP COUNT (16) + do i=1,d2 + B(i,j ) = A(j ,i) + enddo + enddo + return + else if (d1 > d2) then + !DIR$ forceinline + k=d1/2 + !DIR$ forceinline recursive + call transpose(A(1,1),LDA,B(1,1),LDB,k,d2) + !DIR$ forceinline recursive + call transpose(A(k+1,1),LDA,B(1,k+1),LDB,d1-k,d2) + return + else + !DIR$ forceinline + k=d2/2 + !DIR$ forceinline recursive + call transpose(A(1,k+1),LDA,B(k+1,1),LDB,d1,d2-k) + !DIR$ forceinline recursive + call transpose(A(1,1),LDA,B(1,1),LDB,d1,k) + return + endif + +end + +!DIR$ attributes forceinline :: dtranspose +recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2) + implicit none + BEGIN_DOC +! Transpose input matrix A into output matrix B + END_DOC + integer, intent(in) :: d1, d2, LDA, LDB + double precision, intent(in) :: A(LDA,d2) + double precision, intent(out) :: B(LDB,d1) + + integer :: i,j,k, mod_align + if ( d2 < 32 ) then + do j=1,d1 + !DIR$ LOOP COUNT (16) + do i=1,d2 + B(i,j ) = A(j ,i) + enddo + enddo + return + else if (d1 > d2) then + !DIR$ forceinline + k=d1/2 + !DIR$ forceinline recursive + call dtranspose(A(1,1),LDA,B(1,1),LDB,k,d2) + !DIR$ forceinline recursive + call dtranspose(A(k+1,1),LDA,B(1,k+1),LDB,d1-k,d2) + return + else + !DIR$ forceinline + k=d2/2 + !DIR$ forceinline recursive + call dtranspose(A(1,k+1),LDA,B(k+1,1),LDB,d1,d2-k) + !DIR$ forceinline recursive + call dtranspose(A(1,1),LDA,B(1,1),LDB,d1,k) + return + endif + +end + diff --git a/src/Utils/util.irp.f b/src/Utils/util.irp.f index 751610a5..4001e9df 100644 --- a/src/Utils/util.irp.f +++ b/src/Utils/util.irp.f @@ -84,10 +84,8 @@ double precision function fact(n) memo(i) = memo(i-1)*dble(i) enddo memomax = min(n,100) - fact = memo(memomax) - do i=101,n - fact = fact*dble(i) - enddo + double precision :: logfact + fact = dexp(logfact(n)) end function double precision function logfact(n) @@ -158,18 +156,41 @@ double precision function dble_fact_even(n) result(fact2) ! n!! END_DOC integer :: n,k - double precision, save :: memo(1:100) - integer, save :: memomax = 2 + double precision, save :: memo(0:100) + integer, save :: memomax = 0 double precision :: prod ASSERT (iand(n,1) /= 1) - prod=1.d0 - do k=2,n,2 - prod=prod*dfloat(k) +! prod=1.d0 +! do k=2,n,2 +! prod=prod*dfloat(k) +! enddo +! fact2=prod +! return +! + if (n <= memomax) then + if (n < 2) then + fact2 = 1.d0 + else + fact2 = memo(n) + endif + return + endif + + integer :: i + memo(0)=1.d0 + memo(1)=1.d0 + do i=memomax+2,min(n,100),2 + memo(i) = memo(i-2)* dble(i) enddo - fact2=prod - return + memomax = min(n,100) + fact2 = memo(memomax) + + if (n > 100) then + double precision :: dble_logfact + fact2 = dexp(dble_logfact(n)) + endif end function diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index d3ecd312..c3a55a05 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -473,6 +473,11 @@ subroutine end_zmq_push_socket(zmq_socket_push,thread) integer :: rc character*(8), external :: zmq_port + rc = f77_zmq_setsockopt(zmq_socket_push,ZMQ_LINGER,300000,4) + if (rc /= 0) then + stop 'Unable to set ZMQ_LINGER on push socket' + endif + rc = f77_zmq_close(zmq_socket_push) if (rc /= 0) then print *, 'f77_zmq_close(zmq_socket_push)' @@ -794,12 +799,6 @@ subroutine end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) character*(8), external :: zmq_port integer :: rc - rc = f77_zmq_disconnect(zmq_to_qp_run_socket, trim(qp_run_address)//':'//trim(zmq_port(0))) -! if (rc /= 0) then -! print *, 'f77_zmq_disconnect(zmq_to_qp_run_socket, trim(qp_run_address)//'':''//trim(zmq_port(0)))' -! stop 'error' -! endif - rc = f77_zmq_setsockopt(zmq_to_qp_run_socket,ZMQ_LINGER,1000,4) if (rc /= 0) then stop 'Unable to set ZMQ_LINGER on zmq_to_qp_run_socket' @@ -907,3 +906,37 @@ end +subroutine wait_for_states(state_wait,state,n) + use f77_zmq + implicit none + BEGIN_DOC +! Wait for the ZMQ state to be ready + END_DOC + integer, intent(in) :: n + character*(64), intent(in) :: state_wait(n) + character*(64), intent(out) :: state + integer(ZMQ_PTR) :: zmq_socket_sub + integer(ZMQ_PTR), external :: new_zmq_sub_socket + integer :: rc, i + logical :: condition + + zmq_socket_sub = new_zmq_sub_socket() + state = 'Waiting' + condition = .True. + do while (condition) + rc = f77_zmq_recv( zmq_socket_sub, state, 64, 0) + if (rc > 0) then + state = trim(state(1:rc)) + else + print *, 'Timeout reached. Stopping' + state = "Stopped" + endif + condition = trim(state) /= 'Stopped' + do i=1,n + condition = condition .and. (trim(state) /= trim(state_wait(i))) + enddo + end do + call end_zmq_sub_socket(zmq_socket_sub) +end + +