diff --git a/.travis.yml b/.travis.yml index 40c09bbc..57991ba3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ python: - "2.6" script: - - ./configure --production ./config/gfortran.cfg + - ./configure --production ./config/travis.cfg - source ./quantum_package.rc ; qp_module.py install Full_CI Full_CI_ZMQ Hartree_Fock CAS_SD_ZMQ mrcepa0 All_singles - source ./quantum_package.rc ; ninja - source ./quantum_package.rc ; cd ocaml ; make ; cd - diff --git a/config/travis.cfg b/config/travis.cfg new file mode 100644 index 00000000..024e330b --- /dev/null +++ b/config/travis.cfg @@ -0,0 +1,62 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -ffree-line-length-none -I . -g +LAPACK_LIB : -llapack -lblas +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 1 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast -march=native + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -fcheck=all -g + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/plugins/CAS_SD_ZMQ/EZFIO.cfg b/plugins/CAS_SD_ZMQ/EZFIO.cfg new file mode 100644 index 00000000..7425c8ba --- /dev/null +++ b/plugins/CAS_SD_ZMQ/EZFIO.cfg @@ -0,0 +1,10 @@ +[energy] +type: double precision +doc: "Calculated CAS-SD energy" +interface: ezfio + +[energy_pt2] +type: double precision +doc: "Calculated selected CAS-SD energy with PT2 correction" +interface: ezfio + diff --git a/plugins/CAS_SD_ZMQ/NEEDED_CHILDREN_MODULES b/plugins/CAS_SD_ZMQ/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..ae599426 --- /dev/null +++ b/plugins/CAS_SD_ZMQ/NEEDED_CHILDREN_MODULES @@ -0,0 +1,2 @@ +Generators_CAS Perturbation Selectors_CASSD ZMQ + diff --git a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f index 01e57649..6844ed90 100644 --- a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f +++ b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f @@ -99,9 +99,9 @@ program fci_zmq print *, 'N_states = ', N_states do k=1,N_states print *, 'State', k - print *, 'PT2 = ', pt2 - print *, 'E = ', E_CI_before - print *, 'E+PT2 = ', E_CI_before+pt2 + print *, 'PT2 = ', pt2(k) + print *, 'E = ', E_CI_before(k) + print *, 'E+PT2 = ', E_CI_before(k)+pt2(k) print *, '-----' enddo call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before+pt2) @@ -145,9 +145,9 @@ subroutine ZMQ_selection(N_in, pt2) step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) - do i= N_det_generators, 1, -step - i_generator_start = max(i-step+1,1) - i_generator_max = i + do i= 1, N_det_generators,step + i_generator_start = i + i_generator_max = min(i+step-1,N_det_generators) write(task,*) i_generator_start, i_generator_max, 1, N call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do @@ -164,7 +164,6 @@ subroutine ZMQ_selection(N_in, pt2) if (N_in > 0) then call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN call copy_H_apply_buffer_to_wf() -call remove_duplicates_in_psi_det if (s2_eig) then call make_s2_eigenfunction endif diff --git a/plugins/CAS_SD_ZMQ/energy.irp.f b/plugins/CAS_SD_ZMQ/energy.irp.f index 4999c176..db1e7d1a 100644 --- a/plugins/CAS_SD_ZMQ/energy.irp.f +++ b/plugins/CAS_SD_ZMQ/energy.irp.f @@ -3,9 +3,9 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] BEGIN_DOC ! E0 in the denominator of the PT2 END_DOC - pt2_E0_denominator(:) = CI_electronic_energy(:) -! pt2_E0_denominator(:) = HF_energy - nuclear_repulsion -! pt2_E0_denominator(:) = barycentric_electronic_energy(:) + pt2_E0_denominator(1:N_states) = CI_electronic_energy(1:N_states) +! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion +! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator') END_PROVIDER diff --git a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f index 36550116..dfaee629 100644 --- a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f +++ b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f @@ -4,7 +4,7 @@ subroutine run_selection_slave(thread,iproc,energy) use selection_types implicit none - double precision, intent(in) :: energy(N_states_diag) + double precision, intent(in) :: energy(N_states) integer, intent(in) :: thread, iproc integer :: rc, i diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f index 39131520..f90ee488 100644 --- a/plugins/CAS_SD_ZMQ/selection.irp.f +++ b/plugins/CAS_SD_ZMQ/selection.irp.f @@ -202,11 +202,6 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, if(vect(1, p1) == 0d0) cycle call apply_particle(mask, sp, p1, det, ok, N_int) -logical, external :: is_in_wavefunction -if (is_in_wavefunction(det,N_int)) then - cycle -endif - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) max_e_pert = 0d0 diff --git a/plugins/CAS_SD_ZMQ/selection_cassd_slave.irp.f b/plugins/CAS_SD_ZMQ/selection_cassd_slave.irp.f new file mode 100644 index 00000000..657ad63c --- /dev/null +++ b/plugins/CAS_SD_ZMQ/selection_cassd_slave.irp.f @@ -0,0 +1,93 @@ +program selection_slave + implicit none + BEGIN_DOC +! Helper program to compute the PT2 in distributed mode. + END_DOC + + read_wf = .False. + SOFT_TOUCH read_wf + call provide_everything + call switch_qp_run_to_master + call run_wf +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context + PROVIDE pt2_e0_denominator mo_tot_num N_int +end + +subroutine run_wf + use f77_zmq + implicit none + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states) + character*(64) :: states(1) + integer :: rc, i + + call provide_everything + + zmq_context = f77_zmq_ctx_new () + states(1) = 'selection' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + + call wait_for_states(states,zmq_state,1) + + if(trim(zmq_state) == 'Stopped') then + + exit + + else if (trim(zmq_state) == 'selection') then + + ! Selection + ! --------- + + print *, 'Selection' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call selection_slave_tcp(i, energy) + !$OMP END PARALLEL + print *, 'Selection done' + + endif + + end do +end + +subroutine update_energy(energy) + implicit none + double precision, intent(in) :: energy(N_states) + BEGIN_DOC +! Update energy when it is received from ZMQ + END_DOC + integer :: j,k + do j=1,N_states + do k=1,N_det + CI_eigenvectors(k,j) = psi_coef(k,j) + enddo + enddo + call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) + if (.True.) then + do k=1,N_states + ci_electronic_energy(k) = energy(k) + enddo + TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors + endif + + call write_double(6,ci_energy,'Energy') +end + +subroutine selection_slave_tcp(i,energy) + implicit none + double precision, intent(in) :: energy(N_states) + integer, intent(in) :: i + + call run_selection_slave(0,i,energy) +end + diff --git a/plugins/FOBOCI/SC2_1h1p.irp.f b/plugins/FOBOCI/SC2_1h1p.irp.f index b9378575..7733831c 100644 --- a/plugins/FOBOCI/SC2_1h1p.irp.f +++ b/plugins/FOBOCI/SC2_1h1p.irp.f @@ -210,7 +210,7 @@ subroutine dressing_1h1p_by_2h2p(dets_in,u_in,diag_H_elements,dim_in,sze,N_st,Ni integer, intent(in) :: dim_in, sze, N_st, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(inout) :: u_in(dim_in,N_st) - double precision, intent(out) :: diag_H_elements(dim_in) + double precision, intent(out) :: diag_H_elements(0:dim_in) double precision, intent(in) :: convergence integer :: i,j,k,l diff --git a/plugins/Full_CI_ZMQ/EZFIO.cfg b/plugins/Full_CI_ZMQ/EZFIO.cfg new file mode 100644 index 00000000..26f1a8e5 --- /dev/null +++ b/plugins/Full_CI_ZMQ/EZFIO.cfg @@ -0,0 +1,11 @@ +[energy] +type: double precision +doc: Calculated Selected FCI energy +interface: ezfio + +[energy_pt2] +type: double precision +doc: Calculated FCI energy + PT2 +interface: ezfio + + diff --git a/plugins/Full_CI_ZMQ/energy.irp.f b/plugins/Full_CI_ZMQ/energy.irp.f index 4999c176..db1e7d1a 100644 --- a/plugins/Full_CI_ZMQ/energy.irp.f +++ b/plugins/Full_CI_ZMQ/energy.irp.f @@ -3,9 +3,9 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] BEGIN_DOC ! E0 in the denominator of the PT2 END_DOC - pt2_E0_denominator(:) = CI_electronic_energy(:) -! pt2_E0_denominator(:) = HF_energy - nuclear_repulsion -! pt2_E0_denominator(:) = barycentric_electronic_energy(:) + pt2_E0_denominator(1:N_states) = CI_electronic_energy(1:N_states) +! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion +! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator') END_PROVIDER diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 8b9488d2..382e8652 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -5,11 +5,16 @@ program fci_zmq double precision, allocatable :: pt2(:) integer :: degree + integer :: n_det_before, to_select + double precision :: threshold_davidson_in allocate (pt2(N_states)) pt2 = 1.d0 diag_algorithm = "Lapack" + threshold_davidson_in = threshold_davidson + SOFT_TOUCH threshold_davidson + threshold_davidson = 1.d-4 if (N_det > N_det_max) then call diagonalize_CI @@ -33,18 +38,25 @@ program fci_zmq double precision :: E_CI_before(N_states) - integer :: n_det_before print*,'Beginning the selection ...' E_CI_before(1:N_states) = CI_energy(1:N_states) + n_det_before = 0 do while ( (N_det < N_det_max) .and. (maxval(abs(pt2(1:N_states))) > pt2_max) ) n_det_before = N_det - call ZMQ_selection(max(1024-N_det, N_det), pt2) + to_select = 3*N_det + to_select = max(1024-to_select, to_select) + to_select = min(to_select, N_det_max-n_det_before) + call ZMQ_selection(to_select, pt2) PROVIDE psi_coef PROVIDE psi_det PROVIDE psi_det_sorted + if (N_det == N_det_max) then + threshold_davidson = threshold_davidson_in + SOFT_TOUCH threshold_davidson + endif call diagonalize_CI call save_wavefunction @@ -137,9 +149,9 @@ subroutine ZMQ_selection(N_in, pt2) step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) - do i= N_det_generators, 1, -step - i_generator_start = max(i-step+1,1) - i_generator_max = i + do i= 1, N_det_generators,step + i_generator_start = i + i_generator_max = min(i+step-1,N_det_generators) write(task,*) i_generator_start, i_generator_max, 1, N call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 36550116..dfaee629 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -4,7 +4,7 @@ subroutine run_selection_slave(thread,iproc,energy) use selection_types implicit none - double precision, intent(in) :: energy(N_states_diag) + double precision, intent(in) :: energy(N_states) integer, intent(in) :: thread, iproc integer :: rc, i diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 5041e731..d6204cc3 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -22,7 +22,7 @@ subroutine run_wf integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket - double precision :: energy(N_states_diag) + double precision :: energy(N_states) character*(64) :: states(2) integer :: rc, i @@ -48,7 +48,7 @@ subroutine run_wf ! --------- print *, 'Selection' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() @@ -76,7 +76,7 @@ end subroutine update_energy(energy) implicit none - double precision, intent(in) :: energy(N_states_diag) + double precision, intent(in) :: energy(N_states) BEGIN_DOC ! Update energy when it is received from ZMQ END_DOC @@ -88,7 +88,7 @@ subroutine update_energy(energy) enddo call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) if (.True.) then - do k=1,size(ci_electronic_energy) + do k=1,N_states ci_electronic_energy(k) = energy(k) enddo TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors @@ -99,7 +99,7 @@ end subroutine selection_slave_tcp(i,energy) implicit none - double precision, intent(in) :: energy(N_states_diag) + double precision, intent(in) :: energy(N_states) integer, intent(in) :: i call run_selection_slave(0,i,energy) diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index b9e530e0..657ad63c 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -22,7 +22,7 @@ subroutine run_wf integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket - double precision :: energy(N_states_diag) + double precision :: energy(N_states) character*(64) :: states(1) integer :: rc, i @@ -47,7 +47,7 @@ subroutine run_wf ! --------- print *, 'Selection' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() @@ -62,7 +62,7 @@ end subroutine update_energy(energy) implicit none - double precision, intent(in) :: energy(N_states_diag) + double precision, intent(in) :: energy(N_states) BEGIN_DOC ! Update energy when it is received from ZMQ END_DOC @@ -74,7 +74,7 @@ subroutine update_energy(energy) enddo call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) if (.True.) then - do k=1,size(ci_electronic_energy) + do k=1,N_states ci_electronic_energy(k) = energy(k) enddo TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors @@ -85,7 +85,7 @@ end subroutine selection_slave_tcp(i,energy) implicit none - double precision, intent(in) :: energy(N_states_diag) + double precision, intent(in) :: energy(N_states) integer, intent(in) :: i call run_selection_slave(0,i,energy) diff --git a/plugins/MP2/mp2.irp.f b/plugins/MP2/mp2.irp.f index 3a049f7b..d4721c71 100644 --- a/plugins/MP2/mp2.irp.f +++ b/plugins/MP2/mp2.irp.f @@ -1,4 +1,10 @@ program mp2 + no_vvvv_integrals = .True. + SOFT_TOUCH no_vvvv_integrals + call run +end + +subroutine run implicit none double precision, allocatable :: pt2(:), norm_pert(:) double precision :: H_pert_diag, E_old diff --git a/plugins/MP2/mp2_wf.irp.f b/plugins/MP2/mp2_wf.irp.f index 5efbb9cd..e7419319 100644 --- a/plugins/MP2/mp2_wf.irp.f +++ b/plugins/MP2/mp2_wf.irp.f @@ -1,4 +1,10 @@ program mp2_wf + no_vvvv_integrals = .True. + SOFT_TOUCH no_vvvv_integrals + call run +end + +subroutine run implicit none BEGIN_DOC ! Save the MP2 wave function diff --git a/plugins/MRCC_Utils/amplitudes.irp.f b/plugins/MRCC_Utils/amplitudes.irp.f index 053527f7..72d3ea67 100644 --- a/plugins/MRCC_Utils/amplitudes.irp.f +++ b/plugins/MRCC_Utils/amplitudes.irp.f @@ -89,7 +89,7 @@ END_PROVIDER !$OMP shared(is_active_exc, active_hh_idx, active_pp_idx, n_exc_active)& !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh, s) allocate(lref(N_det_non_ref)) - !$OMP DO schedule(static,10) + !$OMP DO schedule(dynamic) do ppp=1,n_exc_active active_excitation_to_determinants_val(:,:,ppp) = 0d0 active_excitation_to_determinants_idx(:,ppp) = 0 @@ -191,6 +191,15 @@ END_PROVIDER end if end do + if (a_col == at_row) then + t(:) = t(:) + 1.d0 + endif + if (sum(dabs(t(:))) > 0.d0) then + wk = wk+1 + A_ind_mwen(wk) = a_col + A_val_mwen(:,wk) = t(:) + endif + end do if(wk /= 0) then diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 9d5e8a67..e667d255 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -628,7 +628,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz integer :: k_pairs, kl integer :: iter2 - double precision, allocatable :: W(:,:), U(:,:), S(:,:) + double precision, allocatable :: W(:,:), U(:,:), S(:,:), overlap(:,:) double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) double precision :: diag_h_mat_elem @@ -640,8 +640,10 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda - if (N_st_diag > sze) then - stop 'error in Davidson : N_st_diag > sze' + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif PROVIDE nuclear_repulsion @@ -666,7 +668,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' @@ -678,26 +680,19 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz integer, external :: align_double sze_8 = align_double(sze) - double precision :: delta - - if (s2_eig) then - delta = 1.d0 - else - delta = 0.d0 - endif - itermax = min(davidson_sze_max, sze/N_st_diag) allocate( & - W(sze_8,N_st_diag*itermax), & - U(sze_8,N_st_diag*itermax), & - S(sze_8,N_st_diag*itermax), & - h(N_st_diag*itermax,N_st_diag*itermax), & - y(N_st_diag*itermax,N_st_diag*itermax), & - s_(N_st_diag*itermax,N_st_diag*itermax), & - s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + W(sze_8,N_st_diag*itermax), & + U(sze_8,N_st_diag*itermax), & + S(sze_8,N_st_diag*itermax), & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & residual_norm(N_st_diag), & - c(N_st_diag*itermax), & - s2(N_st_diag*itermax), & + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & + overlap(N_st_diag*itermax,N_st_diag*itermax), & lambda(N_st_diag*itermax)) h = 0.d0 @@ -721,24 +716,18 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz converged = .False. double precision :: r1, r2 - do k=N_st+1,N_st_diag-2,2 + do k=N_st+1,N_st_diag + u_in(k,k) = 10.d0 do i=1,sze call random_number(r1) call random_number(r2) r1 = dsqrt(-2.d0*dlog(r1)) r2 = dtwo_pi*r2 u_in(i,k) = r1*dcos(r2) - u_in(i,k+1) = r1*dsin(r2) enddo enddo - do k=N_st_diag-1,N_st_diag - do i=1,sze - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - u_in(i,k) = r1*dcos(r2) - enddo + do k=1,N_st_diag + call normalize(u_in(1,k),sze) enddo @@ -776,6 +765,45 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz 1.d0, U, size(U,1), S, size(S,1), & 0.d0, s_, size(s_,1)) +! ! Diagonalize S^2 +! ! --------------- +! +! call lapack_diag(s2,y,s_,size(s_,1),shift2) +! +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + + ! Diagonalize h ! ------------- call lapack_diag(lambda,y,h,size(h,1),shift2) @@ -796,24 +824,73 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz enddo if (s2_eig) then - logical :: state_ok(N_st_diag*davidson_sze_max) + logical :: state_ok(N_st_diag*davidson_sze_max) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + else + state_ok(k) = .True. + endif + + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + + if (state_following) then + + ! Compute overlap with U_in + ! ------------------------- + + integer :: order(N_st_diag) + double precision :: cmax + overlap = -1.d0 do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo enddo - do k=1,shift2 - if (.not. state_ok(k)) then - do l=k+1,shift2 - if (state_ok(l)) then - call dswap(shift2, y(1,k), 1, y(1,l), 1) - call dswap(1, s2(k), 1, s2(l), 1) - call dswap(1, lambda(k), 1, lambda(l), 1) - state_ok(k) = .True. - state_ok(l) = .False. - exit - endif - enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,shift2 + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) endif enddo + do k=1,N_st + overlap(k,1) = lambda(k) + overlap(k,2) = s2(k) + enddo + do k=1,N_st + l = order(k) + if (k /= l) then + lambda(k) = overlap(l,1) + s2(k) = overlap(l,2) + endif + enddo + endif @@ -831,11 +908,31 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz ! ----------------------- do k=1,N_st_diag - do i=1,sze - U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & - )/max(H_jj(i) - lambda (k),1.d-2) - enddo + if (state_ok(k)) then + do i=1,sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & + )/max(H_jj(i) - lambda (k),1.d-2) + enddo + else + ! Randomize components with bad + do i=1,sze-2,2 + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + U(i+1,shift2+k) = r1*dsin(r2) + enddo + do i=sze-2+1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + enddo + endif + if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion @@ -858,20 +955,16 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz enddo - if (.not.converged) then - iter = itermax-1 - endif - ! Re-contract to u_in ! ----------- - do k=1,N_st_diag - energies(k) = lambda(k) - enddo + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) - call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & - U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + enddo + do k=1,N_st_diag + energies(k) = lambda(k) enddo write_buffer = '===== ' @@ -884,7 +977,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz deallocate ( & W, residual_norm, & - U, & + U, overlap, & c, S, & h, & y, s_, s_tmp, & @@ -950,7 +1043,7 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, & - !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij,istate_in) + !$OMP N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 @@ -1035,6 +1128,8 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i do istate=1,N_st vt (istate,i) = vt (istate,i) + delta_ij(istate_in,jj,ii)*ut(istate,j) vt (istate,j) = vt (istate,j) + delta_ij(istate_in,jj,ii)*ut(istate,i) + st (istate,i) = st (istate,i) + delta_ij_s2(istate_in,jj,ii)*ut(istate,j) + st (istate,j) = st (istate,j) + delta_ij_s2(istate_in,jj,ii)*ut(istate,i) enddo enddo enddo diff --git a/plugins/MRCC_Utils/mrcc_dummy.irp.f b/plugins/MRCC_Utils/mrcc_dummy.irp.f deleted file mode 100644 index 8f1deda8..00000000 --- a/plugins/MRCC_Utils/mrcc_dummy.irp.f +++ /dev/null @@ -1,4 +0,0 @@ -program pouet - - -end diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 7005fa19..0540eed9 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -77,18 +77,18 @@ BEGIN_PROVIDER [ double precision, hij_mrcc, (N_det_non_ref,N_det_ref) ] END_PROVIDER - BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] -&BEGIN_PROVIDER [ double precision, delta_ii, (N_states,N_det_ref) ] - implicit none - BEGIN_DOC - ! Dressing matrix in N_det basis - END_DOC - integer :: i,j,m - delta_ij = 0.d0 - delta_ii = 0.d0 - call H_apply_mrcc(delta_ij,delta_ii,N_states,N_det_non_ref,N_det_ref) - -END_PROVIDER +! BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] +!&BEGIN_PROVIDER [ double precision, delta_ii, (N_states,N_det_ref) ] +! implicit none +! BEGIN_DOC +! ! Dressing matrix in N_det basis +! END_DOC +! integer :: i,j,m +! delta_ij = 0.d0 +! delta_ii = 0.d0 +! call H_apply_mrcc(delta_ij,delta_ii,N_states,N_det_non_ref,N_det_ref) +! +!END_PROVIDER BEGIN_PROVIDER [ double precision, h_matrix_dressed, (N_det,N_det,N_states) ] @@ -139,7 +139,6 @@ END_PROVIDER integer :: mrcc_state - mrcc_state = N_states do j=1,min(N_states,N_det) do i=1,N_det CI_eigenvectors_dressed(i,j) = psi_coef(i,j) @@ -148,16 +147,33 @@ END_PROVIDER if (diag_algorithm == "Davidson") then -! call davidson_diag_mrcc(psi_det,CI_eigenvectors_dressed,CI_electronic_energy_dressed,& -! size(CI_eigenvectors_dressed,1),N_det,N_states,N_states_diag,N_int,output_determinants,mrcc_state) - - call davidson_diag_mrcc_HS2(psi_det,CI_eigenvectors_dressed,& - size(CI_eigenvectors_dressed,1), & - CI_electronic_energy_dressed,N_det,N_states,N_states_diag,N_int, & - output_determinants,mrcc_state) - - call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,& - N_states_diag,size(CI_eigenvectors_dressed,1)) + allocate (eigenvectors(size(CI_eigenvectors_dressed,1),size(CI_eigenvectors_dressed,2)), & + eigenvalues(size(CI_electronic_energy_dressed,1))) + do j=1,min(N_states,N_det) + do i=1,N_det + eigenvectors(i,j) = psi_coef(i,j) + enddo + enddo + do mrcc_state=1,N_states + do j=mrcc_state,min(N_states,N_det) + do i=1,N_det + eigenvectors(i,j) = psi_coef(i,j) + enddo + enddo + call davidson_diag_mrcc_HS2(psi_det,eigenvectors,& + size(eigenvectors,1), & + eigenvalues,N_det,N_states,N_states_diag,N_int, & + output_determinants,mrcc_state) + CI_eigenvectors_dressed(1:N_det,mrcc_state) = eigenvectors(1:N_det,mrcc_state) + CI_electronic_energy_dressed(mrcc_state) = eigenvalues(mrcc_state) + enddo + do k=N_states+1,N_states_diag + CI_eigenvectors_dressed(1:N_det,k) = eigenvectors(1:N_det,k) + CI_electronic_energy_dressed(k) = eigenvalues(k) + enddo + call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,& + N_states_diag,size(CI_eigenvectors_dressed,1)) + deallocate (eigenvectors,eigenvalues) else if (diag_algorithm == "Lapack") then @@ -628,12 +644,12 @@ END_PROVIDER double precision :: phase - double precision, allocatable :: rho_mrcc_init(:,:) + double precision, allocatable :: rho_mrcc_init(:) integer :: a_coll, at_roww print *, "TI", hh_nex, N_det_non_ref - allocate(rho_mrcc_init(N_det_non_ref, N_states)) + allocate(rho_mrcc_init(N_det_non_ref)) allocate(x_new(hh_nex)) allocate(x(hh_nex), AtB(hh_nex)) x = 0d0 @@ -644,9 +660,8 @@ END_PROVIDER AtB(:) = 0.d0 !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, active_excitation_to_determinants_idx,& !$OMP active_excitation_to_determinants_val, x, N_det_ref, hh_nex, N_det_non_ref) & - !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)& + !$OMP private(at_row, a_col, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)& !$OMP shared(N_states,mrcc_col_shortcut, mrcc_N_col, AtB, mrcc_AtA_val, mrcc_AtA_ind, s, n_exc_active, active_pp_idx) - allocate(A_val_mwen(N_states,hh_nex), A_ind_mwen(hh_nex), t(N_states)) !$OMP DO schedule(dynamic, 100) do at_roww = 1, n_exc_active ! hh_nex @@ -655,11 +670,11 @@ END_PROVIDER AtB(at_row) = AtB(at_row) + psi_non_ref_coef(active_excitation_to_determinants_idx(i, at_roww), s) * active_excitation_to_determinants_val(s,i, at_roww) end do end do - !$OMP END DO NOWAIT - deallocate (A_ind_mwen, A_val_mwen) + !$OMP END DO + !$OMP END PARALLEL - x = 0d0 + X(:) = 0d0 do a_coll = 1, n_exc_active @@ -669,14 +684,12 @@ END_PROVIDER rho_mrcc_init = 0d0 - !$OMP PARALLEL default(shared) & - !$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase) allocate(lref(N_det_ref)) - !$OMP DO schedule(static, 1) do hh = 1, hh_shortcut(0) do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 if(is_active_exc(pp)) cycle lref = 0 + AtB(pp) = 0.d0 do II=1,N_det_ref call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) if(.not. ok) cycle @@ -686,39 +699,36 @@ END_PROVIDER if(ind == -1) cycle ind = psi_non_ref_sorted_idx(ind) call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) - X(pp) += psi_ref_coef(II,s)**2 AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase lref(II) = ind - if(phase < 0d0) lref(II) = -ind + if(phase < 0.d0) lref(II) = -ind end do - X(pp) = AtB(pp) / X(pp) + X(pp) = AtB(pp) do II=1,N_det_ref if(lref(II) > 0) then - rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp) + rho_mrcc_init(lref(II)) = psi_ref_coef(II,s) * X(pp) else if(lref(II) < 0) then - rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp) + rho_mrcc_init(-lref(II)) = -psi_ref_coef(II,s) * X(pp) end if end do end do end do - !$OMP END DO deallocate(lref) - !$OMP END PARALLEL - + x_new = x double precision :: factor, resold factor = 1.d0 resold = huge(1.d0) - do k=0,100000 + do k=0,hh_nex*hh_nex !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll) !$OMP DO do i=1,N_det_non_ref - rho_mrcc(i,s) = rho_mrcc_init(i,s) + rho_mrcc(i,s) = rho_mrcc_init(i) enddo - !$OMP END DO + !$OMP END DO NOWAIT !$OMP DO do a_coll = 1, n_exc_active @@ -746,15 +756,15 @@ END_PROVIDER X(a_col) = X_new(a_col) end do if (res > resold) then - factor = -factor * 0.5d0 + factor = factor * 0.5d0 endif resold = res - if(mod(k, 100) == 0) then + if(iand(k, 4095) == 0) then print *, "res ", k, res end if - if(res < 1d-9) exit + if(res < 1d-12) exit end do norm = 0.d0 @@ -917,6 +927,9 @@ END_PROVIDER norm = norm*f print *, 'norm of |T Psi_0> = ', dsqrt(norm) + if (dsqrt(norm) > 1.d0) then + stop 'Error : Norm of the SD larger than the norm of the reference.' + endif do i=1,N_det_ref norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) @@ -928,7 +941,7 @@ END_PROVIDER ! rho_mrcc now contains the product of the scaling factors and the ! normalization constant - dIj_unique(:size(X), s) = X(:) + dIj_unique(1:size(X), s) = X(1:size(X)) end do END_PROVIDER @@ -940,17 +953,14 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] integer :: s,i,j double precision, external :: get_dij_index print *, "computing amplitudes..." - !$OMP PARALLEL DEFAULT(shared) PRIVATE(s,i,j) do s=1, N_states - !$OMP DO do i=1, N_det_non_ref do j=1, N_det_ref + !DIR$ FORCEINLINE dij(j, i, s) = get_dij_index(j, i, s, N_int) end do end do - !$OMP END DO end do - !$OMP END PARALLEL print *, "done computing amplitudes" END_PROVIDER diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f index 41db2f10..c4147ebc 100644 --- a/plugins/Psiref_Utils/psi_ref_utils.irp.f +++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f @@ -97,6 +97,10 @@ END_PROVIDER endif enddo N_det_non_ref = i_non_ref + if (N_det_non_ref < 1) then + print *, 'Error : All determinants are in the reference' + stop -1 + endif END_PROVIDER BEGIN_PROVIDER [ integer(bit_kind), psi_non_ref_restart, (N_int,2,psi_det_size) ] diff --git a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ + diff --git a/plugins/Selectors_CASSD/README.rst b/plugins/Selectors_CASSD/README.rst new file mode 100644 index 00000000..19b4ec2b --- /dev/null +++ b/plugins/Selectors_CASSD/README.rst @@ -0,0 +1,12 @@ +=============== +Selectors_CASSD +=============== + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/Selectors_CASSD/selectors.irp.f b/plugins/Selectors_CASSD/selectors.irp.f new file mode 100644 index 00000000..ab36527d --- /dev/null +++ b/plugins/Selectors_CASSD/selectors.irp.f @@ -0,0 +1,95 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, psi_selectors_size ] + implicit none + psi_selectors_size = psi_det_size +END_PROVIDER + +BEGIN_PROVIDER [ integer, N_det_selectors] + implicit none + BEGIN_DOC + ! For Single reference wave functions, the number of selectors is 1 : the + ! Hartree-Fock determinant + END_DOC + N_det_selectors = N_det +END_PROVIDER + + BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ] +&BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ] + implicit none + BEGIN_DOC + ! Determinants on which we apply for perturbation. + END_DOC + integer :: i, k, l, m + logical :: good + + do i=1,N_det_generators + do k=1,N_int + psi_selectors(k,1,i) = psi_det_generators(k,1,i) + psi_selectors(k,2,i) = psi_det_generators(k,2,i) + enddo + enddo + do k=1,N_states + do i=1,N_det_selectors + psi_selectors_coef(i,k) = psi_coef_generators(i,k) + enddo + enddo + + m=N_det_generators + + do i=1,N_det + do l=1,n_cas_bitmask + good = .True. + do k=1,N_int + good = good .and. ( & + iand(not(cas_bitmask(k,1,l)), psi_det_sorted(k,1,i)) == & + iand(not(cas_bitmask(k,1,l)), HF_bitmask(k,1)) .and. ( & + iand(not(cas_bitmask(k,2,l)), psi_det_sorted(k,2,i)) == & + iand(not(cas_bitmask(k,2,l)), HF_bitmask(k,2) )) ) + enddo + if (good) then + exit + endif + enddo + if (.not.good) then + m = m+1 + do k=1,N_int + psi_selectors(k,1,m) = psi_det_sorted(k,1,i) + psi_selectors(k,2,m) = psi_det_sorted(k,2,i) + enddo + psi_selectors_coef(m,:) = psi_coef_sorted(i,:) + endif + enddo + if (N_det /= m) then + print *, N_det, m + stop 'N_det /= m' + endif +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ] + implicit none + BEGIN_DOC + ! Transposed psi_selectors + END_DOC + integer :: i,k + + do i=1,N_det_selectors + do k=1,N_states + psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) + enddo + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] + implicit none + BEGIN_DOC + ! Diagonal elements of the H matrix for each selectors + END_DOC + integer :: i + double precision :: diag_H_mat_elem + do i = 1, N_det_selectors + psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int) + enddo +END_PROVIDER + + diff --git a/plugins/Selectors_CASSD/zmq.irp.f b/plugins/Selectors_CASSD/zmq.irp.f new file mode 100644 index 00000000..4359a876 --- /dev/null +++ b/plugins/Selectors_CASSD/zmq.irp.f @@ -0,0 +1,122 @@ +subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id, energy, size_energy) + use f77_zmq + implicit none + BEGIN_DOC +! Put the wave function on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer, intent(in) :: size_energy + double precision, intent(out) :: energy(size_energy) + integer :: rc + character*(256) :: msg + + write(msg,*) 'put_psi ', worker_id, N_states, N_det, psi_det_size, n_det_generators, n_det_selectors + + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE) + if (rc /= N_int*2*N_det*bit_kind) then + print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE) + if (rc /= psi_det_size*N_states*8) then + print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0) + if (rc /= size_energy*8) then + print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_psi_reply 1') then + print *, rc, trim(msg) + print *, 'Error in put_psi_reply' + stop 'error' + endif + +end + + + +subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy) + use f77_zmq + implicit none + BEGIN_DOC +! Get the wave function from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer, intent(in) :: size_energy + double precision, intent(out) :: energy(size_energy) + integer :: rc + character*(64) :: msg + + write(msg,*) 'get_psi ', worker_id + + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:13) /= 'get_psi_reply') then + print *, rc, trim(msg) + print *, 'Error in get_psi_reply' + stop 'error' + endif + + integer :: N_states_read, N_det_read, psi_det_size_read + integer :: N_det_selectors_read, N_det_generators_read + read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, & + N_det_generators_read, N_det_selectors_read + if (rc /= worker_id) then + print *, 'Wrong worker ID' + stop 'error' + endif + + N_states = N_states_read + N_det = N_det_read + psi_det_size = psi_det_size_read + + rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE) + if (rc /= N_int*2*N_det*bit_kind) then + print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)' + stop 'error' + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE) + if (rc /= psi_det_size*N_states*8) then + print *, '77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)' + stop 'error' + endif + TOUCH psi_det_size N_det N_states psi_det psi_coef + + rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0) + if (rc /= size_energy*8) then + print *, 'f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)' + stop 'error' + endif + + if (N_det_generators_read > 0) then + N_det_generators = N_det_generators_read + TOUCH N_det_generators + endif + if (N_det_selectors_read > 0) then + N_det_selectors = N_det_selectors_read + TOUCH N_det_selectors + endif + +end + + diff --git a/plugins/loc_cele/loc_exchange_int.irp.f b/plugins/loc_cele/loc_exchange_int.irp.f index d7cc5c65..8bb47d89 100644 --- a/plugins/loc_cele/loc_exchange_int.irp.f +++ b/plugins/loc_cele/loc_exchange_int.irp.f @@ -14,7 +14,7 @@ program loc_int exchange_int = 0.d0 iorder = 0 print*,'' - if(list_core_inact_check(iorb) == .False.)cycle + if(list_core_inact_check(iorb) .eqv. .False.)cycle do j = i+1, n_core_inact_orb jorb = list_core_inact(j) iorder(jorb) = jorb @@ -46,7 +46,7 @@ program loc_int exchange_int = 0.d0 iorder = 0 print*,'' - if(list_core_inact_check(iorb) == .False.)cycle + if(list_core_inact_check(iorb) .eqv. .False.)cycle do j = i+1, n_act_orb jorb = list_act(j) iorder(jorb) = jorb @@ -78,7 +78,7 @@ program loc_int exchange_int = 0.d0 iorder = 0 print*,'' - if(list_core_inact_check(iorb) == .False.)cycle + if(list_core_inact_check(iorb) .eqv. .False.)cycle do j = i+1, n_virt_orb jorb = list_virt(j) iorder(jorb) = jorb diff --git a/plugins/loc_cele/loc_exchange_int_act.irp.f b/plugins/loc_cele/loc_exchange_int_act.irp.f index b9bbeb82..f332dd5d 100644 --- a/plugins/loc_cele/loc_exchange_int_act.irp.f +++ b/plugins/loc_cele/loc_exchange_int_act.irp.f @@ -15,7 +15,7 @@ program loc_int exchange_int = 0.d0 iorder = 0 print*,'' - if(list_core_inact_check(iorb) == .False.)cycle + if(list_core_inact_check(iorb) .eqv. .False.)cycle do j = i+1, n_act_orb jorb = list_act(j) iorder(jorb) = jorb diff --git a/plugins/loc_cele/loc_exchange_int_inact.irp.f b/plugins/loc_cele/loc_exchange_int_inact.irp.f index 2ff3c85f..fcf20ced 100644 --- a/plugins/loc_cele/loc_exchange_int_inact.irp.f +++ b/plugins/loc_cele/loc_exchange_int_inact.irp.f @@ -14,7 +14,7 @@ program loc_int exchange_int = 0.d0 iorder = 0 print*,'' - if(list_core_inact_check(iorb) == .False.)cycle + if(list_core_inact_check(iorb) .eqv. .False.)cycle do j = i+1, n_core_inact_orb jorb = list_core_inact(j) iorder(jorb) = jorb diff --git a/plugins/loc_cele/loc_exchange_int_virt.irp.f b/plugins/loc_cele/loc_exchange_int_virt.irp.f index 333f189b..8302b5d2 100644 --- a/plugins/loc_cele/loc_exchange_int_virt.irp.f +++ b/plugins/loc_cele/loc_exchange_int_virt.irp.f @@ -15,7 +15,7 @@ program loc_int exchange_int = 0.d0 iorder = 0 print*,'' - if(list_core_inact_check(iorb) == .False.)cycle + if(list_core_inact_check(iorb) .eqv. .False.)cycle do j = i+1, n_virt_orb jorb = list_virt(j) iorder(jorb) = jorb diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 3646b0b2..0c67ab99 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -4,6 +4,8 @@ use bitmasks BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ] use bitmasks implicit none integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc @@ -14,11 +16,13 @@ use bitmasks delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 - print *, "Dij", dij(1,1,1) + delta_ij_s2_mrcc = 0d0 + delta_ii_s2_mrcc = 0d0 + PROVIDE dij provide hh_shortcut psi_det_size! lambda_mrcc !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & - !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc) & + !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & !$OMP private(h, n, mask, omask, buf, ok, iproc) do gen= 1, N_det_generators allocate(buf(N_int, 2, N_det_non_ref)) @@ -37,7 +41,9 @@ use bitmasks end do n = n - 1 - if(n /= 0) call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc,gen,n,buf,N_int,omask) + if(n /= 0) then + call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask) + endif end do deallocate(buf) @@ -52,13 +58,15 @@ END_PROVIDER ! end subroutine -subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffer,Nint,key_mask) +subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask) use bitmasks implicit none integer, intent(in) :: i_generator,n_selected, Nint double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) double precision, intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref) integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) integer :: i,j,k,l,m @@ -68,8 +76,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind),allocatable :: tq(:,:,:) integer :: N_tq, c_ref ,degree - double precision :: hIk, hla, hIl, dIk(N_states), dka(N_states), dIa(N_states) - double precision, allocatable :: dIa_hla(:,:) + double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states) + double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) double precision :: haj, phase, phase2 double precision :: f(N_states), ci_inv(N_states) integer :: exc(0:2,2,2) @@ -82,7 +90,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind),intent(in) :: key_mask(Nint, 2) integer,allocatable :: idx_miniList(:) integer :: N_miniList, ni, leng - double precision, allocatable :: hij_cache(:) + double precision, allocatable :: hij_cache(:), sij_cache(:) integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:) integer, allocatable :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:) @@ -92,7 +100,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe leng = max(N_det_generators, N_det_non_ref) - allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref)) + allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref)) allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint) call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint) @@ -117,7 +125,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe deallocate(microlist, idx_microlist) - allocate (dIa_hla(N_states,N_det_non_ref)) + allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref)) ! |I> @@ -185,6 +193,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) + call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) enddo ! |I> do i_I=1,N_det_ref @@ -282,31 +291,36 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) hla = hij_cache(k_sd) + sla = sij_cache(k_sd) ! call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla) do i_state=1,N_states dIa_hla(i_state,k_sd) = dIa(i_state) * hla + dIa_sla(i_state,k_sd) = dIa(i_state) * sla enddo enddo call omp_set_lock( psi_ref_lock(i_I) ) do i_state=1,N_states - if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5)then - do l_sd=1,idx_alpha(0) - k_sd = idx_alpha(l_sd) - delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) - delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) - enddo - else +! if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5)then +! do l_sd=1,idx_alpha(0) +! k_sd = idx_alpha(l_sd) +! delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) +! delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) +! delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd) +! delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) +! enddo +! else delta_ii_(i_state,i_I) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) + delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd) enddo - endif +! endif enddo call omp_unset_lock( psi_ref_lock(i_I) ) enddo enddo - deallocate (dIa_hla,hij_cache) + deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache) deallocate(miniList, idx_miniList) end @@ -315,45 +329,84 @@ end BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ] use bitmasks implicit none - integer :: i, j, i_state + integer :: i, j, i_state !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc - do i_state = 1, N_states - if(mrmode == 3) then + if(mrmode == 3) then do i = 1, N_det_ref - delta_ii(i_state,i)= delta_ii_mrcc(i_state,i) + do i_state = 1, N_states + delta_ii(i_state,i)= delta_ii_mrcc(i_state,i) + delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i) + enddo do j = 1, N_det_non_ref - delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i) + do i_state = 1, N_states + delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i) + delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i) + enddo end do end do -! -! do i = 1, N_det_ref -! delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state) -! do j = 1, N_det_non_ref -! delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - delta_sub_ij(i,j,i_state) -! end do -! end do - else if(mrmode == 2) then - do i = 1, N_det_ref + + ! =-=-= BEGIN STATE AVERAGE +! do i = 1, N_det_ref +! delta_ii(:,i)= delta_ii_mrcc(1,i) +! delta_ii_s2(:,i)= delta_ii_s2_mrcc(1,i) +! do i_state = 2, N_states +! delta_ii(:,i) += delta_ii_mrcc(i_state,i) +! delta_ii_s2(:,i) += delta_ii_s2_mrcc(i_state,i) +! enddo +! do j = 1, N_det_non_ref +! delta_ij(:,j,i) = delta_ij_mrcc(1,j,i) +! delta_ij_s2(:,j,i) = delta_ij_s2_mrcc(1,j,i) +! do i_state = 2, N_states +! delta_ij(:,j,i) += delta_ij_mrcc(i_state,j,i) +! delta_ij_s2(:,j,i) += delta_ij_s2_mrcc(i_state,j,i) +! enddo +! end do +! end do +! delta_ij = delta_ij * (1.d0/dble(N_states)) +! delta_ii = delta_ii * (1.d0/dble(N_states)) + ! =-=-= END STATE AVERAGE + ! + ! do i = 1, N_det_ref + ! delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state) + ! do j = 1, N_det_non_ref + ! delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - delta_sub_ij(i,j,i_state) + ! end do + ! end do + else if(mrmode == 2) then + do i = 1, N_det_ref + do i_state = 1, N_states delta_ii(i_state,i)= delta_ii_old(i_state,i) - do j = 1, N_det_non_ref + delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i) - end do + delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i) + enddo end do - else if(mrmode == 1) then - do i = 1, N_det_ref + end do + else if(mrmode == 1) then + do i = 1, N_det_ref + do i_state = 1, N_states delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - do j = 1, N_det_non_ref + delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state) + enddo + do j = 1, N_det_non_ref + do i_state = 1, N_states delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - end do + delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state) + enddo end do - else - stop "invalid mrmode" - end if - end do + end do + else + stop "invalid mrmode" + end if END_PROVIDER @@ -537,28 +590,32 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ] use bitmasks implicit none integer :: i,j,k - double precision :: Hjk, Hki, Hij + double precision :: Sjk,Hjk, Hki, Hij !double precision, external :: get_dij integer i_state, degree provide lambda_mrcc dIj do i_state = 1, N_states - !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref,dij) + !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij) do i=1,N_det_ref do j=1,i call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int) delta_cas(i,j,i_state) = 0d0 + delta_cas_s2(i,j,i_state) = 0d0 do k=1,N_det_non_ref call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) + call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk) delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) - !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int) + delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k) end do delta_cas(j,i,i_state) = delta_cas(i,j,i_state) + delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state) end do end do !$OMP END PARALLEL DO @@ -639,6 +696,8 @@ end function BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ] &BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ] +&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ] use bitmasks implicit none @@ -646,7 +705,7 @@ end function integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1) - double precision :: contrib, contrib2, HIIi, HJk, wall + double precision :: contrib, contrib2, contrib_s2, contrib2_s2, HIIi, HJk, wall integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2) integer(bit_kind),allocatable :: sortRef(:,:,:) @@ -671,14 +730,16 @@ end function ! To provide everything contrib = dij(1, 1, 1) - do i_state = 1, N_states - delta_mrcepa0_ii(:,:) = 0d0 - delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii(:,:) = 0d0 + delta_mrcepa0_ij(:,:,:) = 0d0 + delta_mrcepa0_ii_s2(:,:) = 0d0 + delta_mrcepa0_ij_s2(:,:,:) = 0d0 - !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii) & - !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2) & + do i_state = 1, N_states + !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2) & + !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) & !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active) & - !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) & + !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) & !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij) do blok=1,cepa0_shortcut(0) do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 @@ -721,16 +782,21 @@ end function ! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) + contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) !$OMP ATOMIC delta_mrcepa0_ii(J,i_state) -= contrib2 + delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 else contrib = contrib * 0.5d0 + contrib_s2 = contrib_s2 * 0.5d0 end if !$OMP ATOMIC delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib + delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2 end do kloop end do @@ -741,7 +807,7 @@ end function deallocate(idx_sorted_bit) call wall_time(wall) print *, "cepa0", wall, notf - !stop + END_PROVIDER @@ -860,12 +926,14 @@ subroutine set_det_bit(det, p, s) end subroutine -BEGIN_PROVIDER [ double precision, h_, (N_det_ref,N_det_non_ref) ] + BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ] +&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ] implicit none integer :: i,j do i=1,N_det_ref do j=1,N_det_non_ref - call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_(i,j)) + call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j)) + call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j)) end do end do END_PROVIDER diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index f1d6f029..9e9fa65a 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -37,7 +37,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - double precision, allocatable :: delta(:,:,:) + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) @@ -47,8 +47,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) - double precision :: contrib, wall, iwall - double precision, allocatable :: dleat(:,:,:) + double precision :: contrib, contrib_s2, wall, iwall + double precision, allocatable :: dleat(:,:,:), dleat_s2(:,:,:) integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp @@ -63,6 +63,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) + allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2)) allocate(komon(0:N_det_non_ref)) do @@ -74,10 +75,14 @@ subroutine mrsc2_dressing_slave(thread,iproc) cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state) end do !delta = 0.d0 + !delta_s2 = 0.d0 n = 0 delta(:,0,:) = 0d0 delta(:,:nlink(J),1) = 0d0 delta(:,:nlink(i_I),2) = 0d0 + delta_s2(:,0,:) = 0d0 + delta_s2(:,:nlink(J),1) = 0d0 + delta_s2(:,:nlink(i_I),2) = 0d0 komon(0) = 0 komoned = .false. @@ -121,8 +126,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) end if i = det_cepa0_idx(linked(m, i_I)) - if(h_(J,i) == 0.d0) cycle - if(h_(i_I,i) == 0.d0) cycle + if(h_cache(J,i) == 0.d0) cycle + if(h_cache(i_I,i) == 0.d0) cycle !ok = .false. !do i_state=1, N_states @@ -144,10 +149,13 @@ subroutine mrsc2_dressing_slave(thread,iproc) ! if(I_i == J) phase_Ii = phase_Ji do i_state = 1,N_states - dkI = h_(J,i) * dij(i_I, i, i_state)!get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,i), N_int) - !dkI = h_(J,i) * h_(i_I,i) * lambda_mrcc(i_state, i) + dkI = h_cache(J,i) * dij(i_I, i, i_state) dleat(i_state, kn, 1) = dkI dleat(i_state, kn, 2) = dkI + + dkI = s2_cache(J,i) * dij(i_I, i, i_state) + dleat_s2(i_state, kn, 1) = dkI + dleat_s2(i_state, kn, 2) = dkI end do end do @@ -173,26 +181,32 @@ subroutine mrsc2_dressing_slave(thread,iproc) !if(lambda_mrcc(i_state, i) == 0d0) cycle - !contrib = h_(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al + !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al contrib = dij(i_I, k, i_state) * dleat(i_state, m, 2) + contrib_s2 = dij(i_I, k, i_state) * dleat_s2(i_state, m, 2) delta(i_state,ll,1) += contrib + delta_s2(i_state,ll,1) += contrib_s2 if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state) + delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state) endif if(I_i == J) cycle - !contrib = h_(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al + !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al contrib = dij(J, l, i_state) * dleat(i_state, m, 1) + contrib_s2 = dij(J, l, i_state) * dleat_s2(i_state, m, 1) delta(i_state,kk,2) += contrib + delta_s2(i_state,kk,2) += contrib_s2 if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state) + delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state) end if enddo !i_state end do ! while end do ! kk - call push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) + call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) ! end if @@ -208,7 +222,7 @@ subroutine mrsc2_dressing_slave(thread,iproc) end -subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) +subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) use f77_zmq implicit none BEGIN_DOC @@ -218,6 +232,7 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) integer, intent(in) :: i_I, J integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) integer, intent(in) :: task_id integer :: rc , i_state, i, kk, li integer,allocatable :: idx(:,:) @@ -278,6 +293,12 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' stop 'error' endif + + rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) ! delta_s2(1,0,1) = delta_I delta_s2(1,0,2) = delta_J + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) if (rc /= n(kk)*4) then @@ -305,7 +326,7 @@ end -subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) +subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) use f77_zmq implicit none BEGIN_DOC @@ -315,6 +336,7 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(out) :: i_I, J, n(2) double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) + double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2) integer, intent(out) :: task_id integer :: rc , i, kk integer,intent(inout) :: idx(N_det_non_ref,2) @@ -346,9 +368,15 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) stop 'error' endif + rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) + if (rc /= (n(kk)+1)*8*N_states) then + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)' + stop 'error' + endif + rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) if (rc /= n(kk)*4) then - print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, delta, n(kk)*4, ZMQ_SNDMORE)' + print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)' stop 'error' endif end if @@ -372,7 +400,7 @@ end -subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) +subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_) use f77_zmq implicit none BEGIN_DOC @@ -381,11 +409,13 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) double precision,intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref) double precision,intent(inout) :: delta_ii_(N_states,N_det_ref) + double precision,intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) + double precision,intent(inout) :: delta_ii_s2_(N_states,N_det_ref) ! integer :: j,l integer :: rc - double precision, allocatable :: delta(:,:,:) + double precision, allocatable :: delta(:,:,:), delta_s2(:,:,:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -401,49 +431,47 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) delta_ii_(:,:) = 0d0 delta_ij_(:,:,:) = 0d0 + delta_ii_s2_(:,:) = 0d0 + delta_ij_s2_(:,:,:) = 0d0 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate ( delta(N_states,0:N_det_non_ref,2) ) + allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) allocate(idx(N_det_non_ref,2)) more = 1 do while (more == 1) - call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) + call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) do l=1, n(1) do i_state=1,N_states delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1) + delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1) end do end do do l=1, n(2) do i_state=1,N_states delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2) + delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2) end do end do -! -! do l=1,nlink(J) -! do i_state=1,N_states -! delta_ij_(i_state,det_cepa0_idx(linked(l,J)),i_I) += delta(i_state,l,1) -! delta_ij_(i_state,det_cepa0_idx(linked(l,i_I)),j) += delta(i_state,l,2) -! end do -! end do -! if(n(1) /= 0) then do i_state=1,N_states delta_ii_(i_state,i_I) += delta(i_state,0,1) + delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1) end do end if if(n(2) /= 0) then do i_state=1,N_states delta_ii_(i_state,J) += delta(i_state,0,2) + delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2) end do end if @@ -454,7 +482,7 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) enddo - deallocate( delta ) + deallocate( delta, delta_s2 ) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_pull_socket(zmq_socket_pull) @@ -466,6 +494,8 @@ end BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ] &BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ] implicit none integer :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2 @@ -574,10 +604,10 @@ end ! rc = pthread_create(collector_thread, mrsc2_dressing_collector) print *, nzer, ntot, float(nzer) / float(ntot) provide nproc - !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old) PRIVATE(i) NUM_THREADS(nproc+1) + !$OMP PARALLEL DEFAULT(none) SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() if (i==0) then - call mrsc2_dressing_collector(delta_ii_old,delta_ij_old) + call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old) else call mrsc2_dressing_slave_inproc(i) endif diff --git a/plugins/mrcepa0/mrcc.irp.f b/plugins/mrcepa0/mrcc.irp.f index 91592e62..a5614942 100644 --- a/plugins/mrcepa0/mrcc.irp.f +++ b/plugins/mrcepa0/mrcc.irp.f @@ -8,8 +8,16 @@ program mrsc2sub read_wf = .True. SOFT_TOUCH read_wf - call print_cas_coefs call set_generators_bitmasks_as_holes_and_particles + if (.True.) then + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors(i,j) + enddo + enddo + SOFT_TOUCH psi_coef + endif call run(N_states,energy) if(do_pt2_end)then call run_pt2(N_states,energy) diff --git a/plugins/mrcepa0/mrcepa0.irp.f b/plugins/mrcepa0/mrcepa0.irp.f index 34d3dec5..aeacbb39 100644 --- a/plugins/mrcepa0/mrcepa0.irp.f +++ b/plugins/mrcepa0/mrcepa0.irp.f @@ -8,8 +8,18 @@ program mrcepa0 read_wf = .True. SOFT_TOUCH read_wf - call print_cas_coefs call set_generators_bitmasks_as_holes_and_particles + if (.True.) then + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors(i,j) + enddo + enddo + TOUCH psi_coef + endif + call print_cas_coefs + call run(N_states,energy) if(do_pt2_end)then call run_pt2(N_states,energy) diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f index e3a2d1f5..1b2e2fcb 100644 --- a/plugins/mrcepa0/mrcepa0_general.irp.f +++ b/plugins/mrcepa0/mrcepa0_general.irp.f @@ -17,12 +17,11 @@ subroutine run(N_st,energy) double precision, allocatable :: lambda(:) allocate (lambda(N_states)) - thresh_mrcc = thresh_dressed_ci n_it_mrcc_max = n_it_max_dressed_ci if(n_it_mrcc_max == 1) then - do j=1,N_states_diag + do j=1,N_states do i=1,N_det psi_coef(i,j) = CI_eigenvectors_dressed(i,j) enddo @@ -31,7 +30,6 @@ subroutine run(N_st,energy) call write_double(6,ci_energy_dressed(1),"Final MRCC energy") call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) call save_wavefunction - energy(:) = ci_energy_dressed(:) else E_new = 0.d0 delta_E = 1.d0 @@ -39,15 +37,21 @@ subroutine run(N_st,energy) lambda = 1.d0 do while (delta_E > thresh_mrcc) iteration += 1 - print *, '===========================' - print *, 'MRCEPA0 Iteration', iteration - print *, '===========================' + print *, '===============================================' + print *, 'MRCEPA0 Iteration', iteration, '/', n_it_mrcc_max + print *, '===============================================' print *, '' - E_old = sum(ci_energy_dressed) - call write_double(6,ci_energy_dressed(1),"MRCEPA0 energy") + E_old = sum(ci_energy_dressed(1:N_states)) + do i=1,N_st + call write_double(6,ci_energy_dressed(i),"MRCEPA0 energy") + enddo call diagonalize_ci_dressed(lambda) - E_new = sum(ci_energy_dressed) - delta_E = dabs(E_new - E_old) + E_new = sum(ci_energy_dressed(1:N_states)) + delta_E = (E_new - E_old)/dble(N_states) + print *, '' + call write_double(6,thresh_mrcc,"thresh_mrcc") + call write_double(6,delta_E,"delta_E") + delta_E = dabs(delta_E) call save_wavefunction call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) if (iteration >= n_it_mrcc_max) then @@ -55,8 +59,8 @@ subroutine run(N_st,energy) endif enddo call write_double(6,ci_energy_dressed(1),"Final MRCEPA0 energy") - energy(:) = ci_energy_dressed(:) endif + energy(1:N_st) = ci_energy_dressed(1:N_st) end diff --git a/plugins/mrcepa0/mrsc2.irp.f b/plugins/mrcepa0/mrsc2.irp.f index d0f44a33..948b1b5c 100644 --- a/plugins/mrcepa0/mrsc2.irp.f +++ b/plugins/mrcepa0/mrsc2.irp.f @@ -7,8 +7,16 @@ program mrsc2 mrmode = 2 read_wf = .True. SOFT_TOUCH read_wf - call print_cas_coefs call set_generators_bitmasks_as_holes_and_particles + if (.True.) then + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors(i,j) + enddo + enddo + TOUCH psi_coef + endif call run(N_states,energy) if(do_pt2_end)then call run_pt2(N_states,energy) diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg index 415e359e..7724400f 100644 --- a/src/Davidson/EZFIO.cfg +++ b/src/Davidson/EZFIO.cfg @@ -6,7 +6,25 @@ default: 1.e-12 [n_states_diag] type: States_number -doc: n_states_diag +doc: Number of states to consider during the Davdison diagonalization default: 10 interface: ezfio,provider,ocaml +[davidson_sze_max] +type: Strictly_positive_int +doc: Number of micro-iterations before re-contracting +default: 10 +interface: ezfio,provider,ocaml + +[state_following] +type: logical +doc: If true, the states are re-ordered to match the input states +default: False +interface: ezfio,provider,ocaml + +[disk_based_davidson] +type: logical +doc: If true, disk space is used to store the vectors +default: False +interface: ezfio,provider,ocaml + diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index fddac471..2402f973 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -45,7 +45,11 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d !$OMP END DO !$OMP END PARALLEL - call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + if (disk_based_davidson) then + call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + else + call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + endif do i=1,N_st_diag s2_out(i) = S2_jj(i) enddo @@ -83,8 +87,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(in) :: H_jj(sze) - double precision, intent(inout) :: S2_jj(sze) - integer, intent(in) :: iunit + double precision, intent(inout) :: S2_jj(sze) + integer, intent(in) :: iunit double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag) @@ -98,7 +102,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer :: k_pairs, kl integer :: iter2 - double precision, allocatable :: W(:,:), U(:,:), S(:,:) + double precision, allocatable :: W(:,:), U(:,:), S(:,:), overlap(:,:) double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) double precision :: diag_h_mat_elem @@ -107,17 +111,19 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s double precision :: to_print(3,N_st) double precision :: cpu, wall integer :: shift, shift2, itermax + double precision :: r1, r2 + logical :: state_ok(N_st_diag*davidson_sze_max) include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda if (N_st_diag*3 > sze) then - print *, 'error in Davidson :' - print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 - stop -1 + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif - + PROVIDE nuclear_repulsion expected_s2 - + call write_time(iunit) call wall_time(wall) call cpu_time(cpu) @@ -136,7 +142,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s write(iunit,'(A)') trim(write_buffer) write_buffer = ' Iter' do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual' + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' enddo write(iunit,'(A)') trim(write_buffer) write_buffer = '===== ' @@ -144,31 +150,32 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s write_buffer = trim(write_buffer)//' ================ =========== ===========' enddo write(iunit,'(A)') trim(write_buffer) - - integer, external :: align_double + + integer, external :: align_double sze_8 = align_double(sze) - - itermax = min(davidson_sze_max, sze/N_st_diag) + + itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) allocate( & - W(sze_8,N_st_diag*itermax), & - U(sze_8,N_st_diag*itermax), & - S(sze_8,N_st_diag*itermax), & - h(N_st_diag*itermax,N_st_diag*itermax), & - y(N_st_diag*itermax,N_st_diag*itermax), & - s_(N_st_diag*itermax,N_st_diag*itermax), & - s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + W(sze_8,N_st_diag*itermax), & + U(sze_8,N_st_diag*itermax), & + S(sze_8,N_st_diag*itermax), & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & residual_norm(N_st_diag), & - c(N_st_diag*itermax), & - s2(N_st_diag*itermax), & + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & + overlap(N_st_diag*itermax, N_st_diag*itermax), & lambda(N_st_diag*itermax)) - h = 0.d0 - s_ = 0.d0 - s_tmp = 0.d0 + h = 0.d0 U = 0.d0 W = 0.d0 S = 0.d0 y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 ASSERT (N_st > 0) @@ -182,28 +189,21 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s converged = .False. - double precision :: r1, r2 - do k=N_st+1,N_st_diag-2,2 - do i=1,sze - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - u_in(i,k) = r1*dcos(r2) - u_in(i,k+1) = r1*dsin(r2) - enddo + do k=N_st+1,N_st_diag + u_in(k,k) = 10.d0 + do i=1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo enddo - do k=N_st_diag-1,N_st_diag - do i=1,sze - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - u_in(i,k) = r1*dcos(r2) - enddo + do k=1,N_st_diag + call normalize(u_in(1,k),sze) enddo - - + + do while (.not.converged) do k=1,N_st_diag @@ -211,12 +211,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s U(i,k) = u_in(i,k) enddo enddo - + do iter=1,itermax-1 shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter - + call ortho_qr(U,size(U,1),sze,shift2) ! Compute |W_k> = \sum_i |i> @@ -239,8 +239,49 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s 0.d0, s_, size(s_,1)) +! ! Diagonalize S^2 +! ! --------------- +! +! call lapack_diag(s2,y,s_,size(s_,1),shift2) +! +! +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + + ! Diagonalize h ! ------------- + call lapack_diag(lambda,y,h,size(h,1),shift2) ! Compute S2 for each eigenvector @@ -261,24 +302,70 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo if (s2_eig) then - logical :: state_ok(N_st_diag*davidson_sze_max) + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + else + state_ok(k) = .True. + endif + + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + + if (state_following) then + + integer :: order(N_st_diag) + double precision :: cmax + + overlap = -1.d0 do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo enddo - do k=1,shift2 - if (.not. state_ok(k)) then - do l=k+1,shift2 - if (state_ok(l)) then - call dswap(shift2, y(1,k), 1, y(1,l), 1) - call dswap(1, s2(k), 1, s2(l), 1) - call dswap(1, lambda(k), 1, lambda(l), 1) - state_ok(k) = .True. - state_ok(l) = .False. - exit - endif - enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) endif enddo + do k=1,N_st + overlap(k,1) = lambda(k) + overlap(k,2) = s2(k) + enddo + do k=1,N_st + l = order(k) + if (k /= l) then + lambda(k) = overlap(l,1) + s2(k) = overlap(l,2) + endif + enddo + endif @@ -296,11 +383,31 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! ----------------------------------------- do k=1,N_st_diag - do i=1,sze - U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & - )/max(H_jj(i) - lambda (k),1.d-2) - enddo + if (state_ok(k)) then + do i=1,sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & + )/max(H_jj(i) - lambda (k),1.d-2) + enddo + else + ! Randomize components with bad + do i=1,sze-2,2 + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + U(i+1,shift2+k) = r1*dsin(r2) + enddo + do i=sze-2+1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + enddo + endif + if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion @@ -345,7 +452,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s deallocate ( & W, residual_norm, & - U, & + U, overlap, & c, S, & h, & y, s_, s_tmp, & @@ -353,3 +460,439 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ) end +subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + use bitmasks + use mmap_module + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! S2_jj : specific diagonal S^2 matrix elements + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag : Number of states in which H is diagonalized. Assumed > sze + ! + ! iunit : Unit for the I/O + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + double precision, intent(inout) :: S2_jj(sze) + integer, intent(in) :: iunit + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + + integer :: sze_8 + integer :: iter + integer :: i,j,k,l,m + logical :: converged + + double precision :: u_dot_v, u_dot_u + + integer :: k_pairs, kl + + integer :: iter2 + double precision, pointer :: W(:,:), U(:,:), S(:,:), overlap(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) + double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(3,N_st) + double precision :: cpu, wall + logical :: state_ok(N_st_diag*davidson_sze_max) + integer :: shift, shift2, itermax + include 'constants.include.F' + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 + endif + + PROVIDE nuclear_repulsion expected_s2 + + call write_time(iunit) + call wall_time(wall) + call cpu_time(cpu) + write(iunit,'(A)') '' + write(iunit,'(A)') 'Davidson Diagonalization' + write(iunit,'(A)') '------------------------' + write(iunit,'(A)') '' + call write_int(iunit,N_st,'Number of states') + call write_int(iunit,N_st_diag,'Number of states in diagonalization') + call write_int(iunit,sze,'Number of determinants') + write(iunit,'(A)') '' + write_buffer = '===== ' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(iunit,'(A)') trim(write_buffer) + write_buffer = ' Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + enddo + write(iunit,'(A)') trim(write_buffer) + write_buffer = '===== ' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(iunit,'(A)') trim(write_buffer) + + integer, external :: align_double + integer :: fd(3) + type(c_ptr) :: c_pointer(3) + sze_8 = align_double(sze) + + itermax = min(davidson_sze_max, sze/N_st_diag) + + call mmap( & + trim(ezfio_work_dir)//'U', & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + 8, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) ) + + call mmap( & + trim(ezfio_work_dir)//'W', & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + 8, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) ) + + call mmap( & + trim(ezfio_work_dir)//'S', & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + 8, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) ) + + allocate( & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + overlap(N_st_diag*itermax, N_st_diag*itermax), & + residual_norm(N_st_diag), & + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & + lambda(N_st_diag*itermax)) + + h = 0.d0 + U = 0.d0 + W = 0.d0 + S = 0.d0 + y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + + double precision :: r1, r2 + do k=N_st+1,N_st_diag + u_in(k,k) = 10.d0 + do i=1,sze + call random_number(r1) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + do k=1,N_st_diag + call normalize(u_in(1,k),sze) + enddo + + + do while (.not.converged) + + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + + do iter=1,itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + + call ortho_qr(U,size(U,1),sze,shift2) + + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------------- + + +! call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + + + ! Compute h_kl = = + ! ------------------------------------------- + + do k=1,iter + shift = N_st_diag*(k-1) + call dgemm('T','N', N_st_diag, shift2, sze, & + 1.d0, U(1,shift+1), size(U,1), W, size(W,1), & + 0.d0, h(shift+1,1), size(h,1)) + + call dgemm('T','N', N_st_diag, shift2, sze, & + 1.d0, U(1,shift+1), size(U,1), S, size(S,1), & + 0.d0, s_(shift+1,1), size(s_,1)) + enddo + +! ! Diagonalize S^2 +! ! --------------- +! +! call lapack_diag(s2,y,s_,size(s_,1),shift2) +! +! +! ! Rotate H in the basis of eigenfunctions of s2 +! ! --------------------------------------------- +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('T','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) +! +! ! Damp interaction between different spin states +! ! ------------------------------------------------ +! +! do k=1,shift2 +! do l=1,shift2 +! if (dabs(s2(k) - s2(l)) > 1.d0) then +! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) +! endif +! enddo +! enddo +! +! ! Rotate back H +! ! ------------- +! +! call dgemm('N','T',shift2,shift2,shift2, & +! 1.d0, h, size(h,1), y, size(y,1), & +! 0.d0, s_tmp, size(s_tmp,1)) +! +! call dgemm('N','N',shift2,shift2,shift2, & +! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & +! 0.d0, h, size(h,1)) + + + ! Diagonalize h + ! ------------- + call lapack_diag(lambda,y,h,size(h,1),shift2) + + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + + + do k=1,shift2 + s2(k) = s_(k,k) + S_z2_Sz + enddo + + + if (s2_eig) then + do k=1,shift2 + state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) + enddo + else + state_ok(k) = .True. + endif + + do k=1,shift2 + if (.not. state_ok(k)) then + do l=k+1,shift2 + if (state_ok(l)) then + call dswap(shift2, y(1,k), 1, y(1,l), 1) + call dswap(1, s2(k), 1, s2(l), 1) + call dswap(1, lambda(k), 1, lambda(l), 1) + state_ok(k) = .True. + state_ok(l) = .False. + exit + endif + enddo + endif + enddo + + if (state_following) then + + ! Compute overlap with U_in + ! ------------------------- + + integer :: order(N_st_diag) + double precision :: cmax + overlap = -1.d0 + do k=1,shift2 + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,shift2 + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,shift2 + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + overlap(k,2) = s2(k) + enddo + do k=1,N_st + l = order(k) + if (k /= l) then + lambda(k) = overlap(l,1) + s2(k) = overlap(l,2) + endif + enddo + + endif + + + ! Express eigenvectors of h in the determinant basis + ! -------------------------------------------------- + + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1)) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + do k=1,N_st_diag + if (state_ok(k)) then + do i=1,sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & + )/max(H_jj(i) - lambda (k),1.d-2) + enddo + else + ! Randomize components with bad + do i=1,sze-2,2 + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + U(i+1,shift2+k) = r1*dsin(r2) + enddo + do i=sze-2+1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U(i,shift2+k) = r1*dcos(r2) + enddo + endif + + if (k <= N_st) then + residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = s2(k) + to_print(3,k) = residual_norm(k) + endif + enddo + + write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(1:3,1:N_st) + call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) + do k=1,N_st + if (residual_norm(k) > 1.e8) then + print *, '' + stop 'Davidson failed' + endif + enddo + if (converged) then + exit + endif + + enddo + + ! Re-contract to u_in + ! ----------- + + call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & + U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + + enddo + + do k=1,N_st_diag + energies(k) = lambda(k) + S2_jj(k) = s2(k) + enddo + write_buffer = '===== ' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(iunit,'(A)') trim(write_buffer) + write(iunit,'(A)') '' + call write_time(iunit) + + call munmap( & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + 8, fd(1), c_pointer(1)) + + call munmap( & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + 8, fd(2), c_pointer(2)) + + call munmap( & + (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & + 8, fd(3), c_pointer(3)) + + deallocate ( & + residual_norm, & + c, overlap, & + h, & + y, s_, s_tmp, & + lambda & + ) +end + diff --git a/src/Davidson/parameters.irp.f b/src/Davidson/parameters.irp.f index 82315495..ae8babaa 100644 --- a/src/Davidson/parameters.irp.f +++ b/src/Davidson/parameters.irp.f @@ -1,21 +1,3 @@ -BEGIN_PROVIDER [ integer, davidson_iter_max ] - implicit none - BEGIN_DOC - ! Max number of Davidson iterations - END_DOC - davidson_iter_max = 100 -END_PROVIDER - -BEGIN_PROVIDER [ integer, davidson_sze_max ] - implicit none - BEGIN_DOC - ! Max number of Davidson sizes - END_DOC - ASSERT (davidson_sze_max <= davidson_iter_max) - davidson_sze_max = N_states+7 -END_PROVIDER - - BEGIN_PROVIDER [ character(64), davidson_criterion ] implicit none BEGIN_DOC diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index e34ba3ce..dd5ab1ab 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -422,7 +422,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP CRITICAL (u0Hu0) do istate=1,N_st - do i=n,1,-1 + do i=1,n v_0(i,istate) = v_0(i,istate) + vt(istate,i) s_0(i,istate) = s_0(i,istate) + st(istate,i) enddo diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 9c3b35b5..44a15ddf 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -148,17 +148,42 @@ subroutine ortho_qr(A,LDA,m,n) allocate (jpvt(n), tau(n), work(1)) LWORK=-1 -! call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO) call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) - LWORK=WORK(1) + LWORK=2*WORK(1) deallocate(WORK) allocate(WORK(LWORK)) -! call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO) call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO) deallocate(WORK,jpvt,tau) end +subroutine ortho_qr_unblocked(A,LDA,m,n) + implicit none + BEGIN_DOC + ! Orthogonalization using Q.R factorization + ! + ! A : matrix to orthogonalize + ! + ! LDA : leftmost dimension of A + ! + ! n : Number of rows of A + ! + ! m : Number of columns of A + ! + END_DOC + integer, intent(in) :: m,n, LDA + double precision, intent(inout) :: A(LDA,n) + + integer :: info + integer, allocatable :: jpvt(:) + double precision, allocatable :: tau(:), work(:) + + allocate (jpvt(n), tau(n), work(n)) + call dgeqr2( m, n, A, LDA, TAU, WORK, INFO ) + call dorg2r(m, n, n, A, LDA, tau, WORK, INFO) + deallocate(WORK,jpvt,tau) +end + subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m) implicit none BEGIN_DOC @@ -444,7 +469,12 @@ subroutine lapack_diag(eigvalues,eigvectors,H,nmax,n) print *, irp_here, ': DSYEV: the ',-info,'-th argument had an illegal value' stop 2 else if( info > 0 ) then - write(*,*)'DSYEV Failed' + write(*,*)'DSYEV Failed : ', info + do i=1,n + do j=1,n + print *, H(i,j) + enddo + enddo stop 1 end if diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index 4a83582f..80260233 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -622,7 +622,7 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in) istep = ishft(iend-ibegin,-1) idx = ibegin + istep - do while (istep > 16) + do while (istep > 64) idx = ibegin + istep ! TODO : Cache misses if (cache_key < X(idx)) then @@ -660,8 +660,8 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in) endif enddo idx = ibegin - if (min(iend_in,sze) > ibegin+16) then - iend = ibegin+16 + if (min(iend_in,sze) > ibegin+64) then + iend = ibegin+64 do while (cache_key > X(idx)) idx = idx+1 end do @@ -730,7 +730,7 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in istep = ishft(iend-ibegin,-1) idx = ibegin + istep - do while (istep > 16) + do while (istep > 64) idx = ibegin + istep if (cache_key < X(idx)) then iend = idx @@ -771,8 +771,8 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in enddo idx = ibegin value = Y(idx) - if (min(iend_in,sze) > ibegin+16) then - iend = ibegin+16 + if (min(iend_in,sze) > ibegin+64) then + iend = ibegin+64 do while (cache_key > X(idx)) idx = idx+1 value = Y(idx) diff --git a/tests/bats/cassd.bats b/tests/bats/cassd.bats index 44b44ee6..f43ffaaa 100644 --- a/tests/bats/cassd.bats +++ b/tests/bats/cassd.bats @@ -5,13 +5,22 @@ source $QP_ROOT/tests/bats/common.bats.sh @test "CAS_SD H2O cc-pVDZ" { test_exe cassd_zmq || skip INPUT=h2o.ezfio + rm -rf work/h2o.ezfio/determinants/ qp_edit -c $INPUT ezfio set_file $INPUT - ezfio set perturbation do_pt2_end False - ezfio set determinants n_det_max 2000 + ezfio set perturbation do_pt2_end True + ezfio set determinants n_det_max 16384 qp_set_mo_class $INPUT -core "[1]" -inact "[2,5]" -act "[3,4,6,7]" -virt "[8-24]" qp_run cassd_zmq $INPUT + energy="$(ezfio get cas_sd_zmq energy_pt2)" + eq $energy -76.23109 2.E-5 + + ezfio set determinants n_det_max 2048 + ezfio set determinants read_wf True + ezfio set perturbation do_pt2_end True + qp_run cassd_zmq $INPUT + ezfio set determinants read_wf False energy="$(ezfio get cas_sd_zmq energy)" - eq $energy -76.2221842108163 1.E-5 + eq $energy -76.2300888408526 2.E-5 } diff --git a/tests/bats/mrcepa0.bats b/tests/bats/mrcepa0.bats index 8b56c606..ed69681f 100644 --- a/tests/bats/mrcepa0.bats +++ b/tests/bats/mrcepa0.bats @@ -15,8 +15,8 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 lambda_type 1 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT - energy="$(ezfio get mrcepa0 energy)" - eq $energy -76.22903276183061 1.e-4 + energy="$(ezfio get mrcepa0 energy_pt2)" + eq $energy -76.238562120457431 1.e-4 } @test "MRCC H2O cc-pVDZ" { @@ -32,8 +32,8 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 lambda_type 0 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT - energy="$(ezfio get mrcepa0 energy)" - eq $energy -76.22899302846875 1.e-4 + energy="$(ezfio get mrcepa0 energy_pt2)" + eq $energy -76.238527498388962 1.e-4 } @test "MRSC2 H2O cc-pVDZ" { @@ -48,8 +48,8 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 lambda_type 0 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT - energy="$(ezfio get mrcepa0 energy)" - eq $energy -76.22647345292708 1.e-4 + energy="$(ezfio get mrcepa0 energy_pt2)" + eq $energy -76.235833732594187 1.e-4 } @test "MRCEPA0 H2O cc-pVDZ" { @@ -64,7 +64,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 lambda_type 0 ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT - energy="$(ezfio get mrcepa0 energy)" - eq $energy -76.23199784430074 1.e-4 + energy="$(ezfio get mrcepa0 energy_pt2)" + eq $energy -76.2418799284763 1.e-4 } diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 4664ce82..9e560d38 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -14,7 +14,7 @@ mrcepa0.bats export QP_PREFIX="timeout -s 9 300" -export QP_TASK_DEBUG=1 +#export QP_TASK_DEBUG=1 rm -rf work output