diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 229e3844..8882ee88 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -1,18 +1,20 @@ subroutine run_cipsi implicit none + use selection_types BEGIN_DOC ! Selected Full Configuration Interaction with deterministic selection and ! stochastic PT2. END_DOC integer :: i,j,k - double precision, allocatable :: pt2(:), variance(:), norm(:), rpt2(:), zeros(:) + type(pt2_type) :: pt2_data, pt2_data_err + double precision, allocatable :: zeros(:) integer :: to_select logical, external :: qp_stop double precision :: threshold_generators_save double precision :: rss double precision, external :: memory_of_double - PROVIDE H_apply_buffer_allocated + PROVIDE H_apply_buffer_allocated N_iter = 1 threshold_generators = 1.d0 @@ -21,21 +23,25 @@ subroutine run_cipsi rss = memory_of_double(N_states)*4.d0 call check_mem(rss,irp_here) - allocate (pt2(N_states), zeros(N_states), rpt2(N_states), norm(N_states), variance(N_states)) + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) double precision :: hf_energy_ref logical :: has double precision :: relative_error - !PROVIDE h_apply_buffer_allocated - relative_error=PT2_relative_error zeros = 0.d0 - pt2 = -huge(1.e0) - rpt2 = -huge(1.e0) - norm = 0.d0 - variance = huge(1.e0) + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap(:,:) = 0.d0 + pt2_data % variance = huge(1.e0) + if (is_complex) then + pt2_data % overlap_imag(:,:) = 0.d0 + endif + if (s2_eig) then call make_s2_eigenfunction @@ -77,14 +83,13 @@ subroutine run_cipsi endif double precision :: correlation_energy_ratio - double precision :: error(N_states) correlation_energy_ratio = 0.d0 do while ( & (N_det < N_det_max) .and. & - (maxval(abs(rpt2(1:N_states))) > pt2_max) .and. & - (maxval(abs(variance(1:N_states))) > variance_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. & (correlation_energy_ratio <= correlation_energy_ratio_max) & ) write(*,'(A)') '--------------------------------------------------------------------------------' @@ -93,39 +98,33 @@ subroutine run_cipsi to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) if (do_pt2) then - pt2 = 0.d0 - variance = 0.d0 - norm = 0.d0 + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) threshold_generators_save = threshold_generators threshold_generators = 1.d0 SOFT_TOUCH threshold_generators -! if (is_complex) then -! call zmq_pt2_complex(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & -! norm, 0) ! Stochastic PT2 -! else - call zmq_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & - norm, 0) ! Stochastic PT2 -! endif + call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error, 0) ! Stochastic PT2 threshold_generators = threshold_generators_save SOFT_TOUCH threshold_generators else - call ZMQ_selection(to_select, pt2, variance, norm) + call pt2_dealloc(pt2_data) + call pt2_alloc(pt2_data, N_states) + call ZMQ_selection(to_select, pt2_data) endif - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo - correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref) / & - (psi_energy_with_nucl_rep(1) + rpt2(1) - hf_energy_ref) + (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref) correlation_energy_ratio = min(1.d0,correlation_energy_ratio) call write_double(6,correlation_energy_ratio, 'Correlation ratio') - call print_summary(psi_energy_with_nucl_rep,pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) - call save_energy(psi_energy_with_nucl_rep, rpt2) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() N_iter += 1 @@ -135,15 +134,9 @@ subroutine run_cipsi call copy_H_apply_buffer_to_wf() ! call save_wavefunction - !n_det_before = N_det - !to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) - !to_select = max(N_states_diag, to_select) - !call zmq_selection(to_select, pt2, variance, norm) if (is_complex) then -! call zmq_selection_complex(to_select, pt2, variance, norm) PROVIDE psi_coef_complex else -! call zmq_selection(to_select, pt2, variance, norm) PROVIDE psi_coef endif PROVIDE psi_det @@ -156,7 +149,7 @@ subroutine run_cipsi endif call save_wavefunction call save_energy(psi_energy_with_nucl_rep, zeros) - if (qp_stop()) exit + if (qp_stop()) exit enddo if (.not.qp_stop()) then @@ -171,18 +164,13 @@ subroutine run_cipsi endif if (do_pt2) then - pt2(:) = 0.d0 - variance(:) = 0.d0 - norm(:) = 0.d0 + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) threshold_generators = 1d0 SOFT_TOUCH threshold_generators -! if (is_complex) then -! call zmq_pt2_complex(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, & -! norm,0) ! Stochastic PT2 -! else - call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, & - norm,0) ! Stochastic PT2 -! endif + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 SOFT_TOUCH threshold_generators endif print *, 'N_det = ', N_det @@ -190,15 +178,13 @@ subroutine run_cipsi print *, 'N_states = ', N_states print*, 'correlation_ratio = ', correlation_energy_ratio - - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo - - call save_energy(psi_energy_with_nucl_rep, rpt2) - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + call print_summary(psi_energy_with_nucl_rep(1:N_states), & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() endif + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) end diff --git a/src/cipsi/energy.irp.f b/src/cipsi/energy.irp.f index 38a6a92e..50dc4620 100644 --- a/src/cipsi/energy.irp.f +++ b/src/cipsi/energy.irp.f @@ -41,3 +41,19 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] END_PROVIDER +BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ] + implicit none + BEGIN_DOC + ! Overlap between the perturbed wave functions + END_DOC + pt2_overlap(1:N_states,1:N_states) = 0.d0 +END_PROVIDER + +BEGIN_PROVIDER [ double precision, pt2_overlap_imag, (N_states, N_states) ] + implicit none + BEGIN_DOC + ! Overlap between the perturbed wave functions + END_DOC + pt2_overlap_imag(1:N_states,1:N_states) = 0.d0 +END_PROVIDER + diff --git a/src/cipsi/pert_rdm_providers.irp.f b/src/cipsi/pert_rdm_providers.irp.f index e2917261..caea57b2 100644 --- a/src/cipsi/pert_rdm_providers.irp.f +++ b/src/cipsi/pert_rdm_providers.irp.f @@ -31,7 +31,7 @@ BEGIN_PROVIDER [double precision, pert_2rdm_provider, (n_orb_pert_rdm,n_orb_pert END_PROVIDER -subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection) +subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection) use bitmasks use selection_types implicit none @@ -44,12 +44,10 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num) double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate + integer :: s1, s2, p1, p2, ib, j, istate, jstate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) double precision :: e_pert, delta_E, val, Hii, sum_e_pert, tmp, alpha_h_psi, coef(N_states) double precision, external :: diag_H_mat_elem_fock @@ -152,9 +150,16 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo e_pert = 0.5d0 * (tmp - delta_E) coef(istate) = e_pert / alpha_h_psi print*,e_pert,coef,alpha_h_psi - pt2(istate) = pt2(istate) + e_pert - variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi - norm(istate) = norm(istate) + coef(istate) * coef(istate) + pt2_data % pt2(istate) += e_pert + pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi + enddo + + do istate=1,N_states + alpha_h_psi = mat(istate, p1, p2) + e_pert = coef(istate) * alpha_h_psi + do jstate=1,N_states + pt2_data % overlap(jstate,jstate) = coef(istate) * coef(jstate) + enddo if (weight_selection /= 5) then ! Energy selection diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 7b84221b..c555fe0b 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -115,7 +115,7 @@ end function -subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) +subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) use f77_zmq use selection_types @@ -125,10 +125,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) integer, intent(in) :: N_in ! integer, intent(inout) :: N_in double precision, intent(in) :: relative_error, E(N_states) - double precision, intent(out) :: pt2(N_states),error(N_states) - double precision, intent(out) :: variance(N_states),norm2(N_states) - - + type(pt2_type), intent(inout) :: pt2_data, pt2_data_err +! integer :: i, N double precision :: state_average_weight_save(N_states), w(N_states,4) @@ -154,11 +152,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) endif if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then - pt2=0.d0 - variance=0.d0 - norm2=0.d0 - call ZMQ_selection(N_in, pt2, variance, norm2) - error(:) = 0.d0 + call ZMQ_selection(N_in, pt2_data) else N = max(N_in,1) * N_states @@ -272,8 +266,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) mem_collector = 8.d0 * & ! bytes ( 1.d0*pt2_n_tasks_max & ! task_id, index + 0.635d0*N_det_generators & ! f,d - + 3.d0*N_det_generators*N_states & ! eI, vI, nI - + 3.d0*pt2_n_tasks_max*N_states & ! eI_task, vI_task, nI_task + + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task + + N_det_generators*pt2_type_size(N_states) & ! pt2_data_I + 4.d0*(pt2_N_teeth+1) & ! S, S2, T2, T3 + 1.d0*(N_int*2.d0*N + N) & ! selection buffer + 1.d0*(N_int*2.d0*N + N) & ! sort selection buffer @@ -288,7 +282,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) nproc_target * 8.d0 * & ! bytes ( 0.5d0*pt2_n_tasks_max & ! task_id + 64.d0*pt2_n_tasks_max & ! task - + 3.d0*pt2_n_tasks_max*N_states & ! pt2, variance, norm2 + + pt2_type_size(N_states)*pt2_n_tasks_max*N_states & ! pt2, variance, overlap + 1.d0*pt2_n_tasks_max & ! i_generator, subset + 1.d0*(N_int*2.d0*ii+ ii) & ! selection buffer + 1.d0*(N_int*2.d0*ii+ ii) & ! sort selection buffer @@ -321,21 +315,24 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) call omp_set_nested(.false.) - print '(A)', '========== ================= =========== =============== =============== =================' - print '(A)', ' Samples Energy Stat. Err Variance Norm^2 Seconds ' - print '(A)', '========== ================= =========== =============== =============== =================' + print '(A)', '========== ======================= ===================== ===================== ===========' + print '(A)', ' Samples Energy Variance Norm^2 Seconds' + print '(A)', '========== ======================= ===================== ===================== ===========' PROVIDE global_selection_buffer + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w(1,1), w(1,2), w(1,3), w(1,4), b, N) - pt2(pt2_stoch_istate) = w(pt2_stoch_istate,1) - error(pt2_stoch_istate) = w(pt2_stoch_istate,2) - variance(pt2_stoch_istate) = w(pt2_stoch_istate,3) - norm2(pt2_stoch_istate) = w(pt2_stoch_istate,4) + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N) + pt2_data % rpt2(pt2_stoch_istate) = & + pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)) + + !TODO : We should use here the correct formula for the error of X/Y + pt2_data_err % rpt2(pt2_stoch_istate) = & + pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)) else call pt2_slave_inproc(i) @@ -343,11 +340,48 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - print '(A)', '========== ================= =========== =============== =============== =================' + print '(A)', '========== ======================= ===================== ===================== ===========' + + do k=1,N_states + pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate) + enddo + SOFT_TOUCH pt2_overlap + if (is_complex) then + !TODO: transpose/conjugate? + do k=1,N_states + pt2_overlap_imag(pt2_stoch_istate,k) = pt2_data % overlap_imag(k,pt2_stoch_istate) + enddo + SOFT_TOUCH pt2_overlap_imag + endif enddo FREE pt2_stoch_istate + ! Symmetrize overlap + do j=2,N_states + do i=1,j-1 + pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i)) + pt2_overlap(j,i) = pt2_overlap(i,j) + enddo + enddo + + if (is_complex) then + !TODO: check sign + do j=2,N_states + do i=1,j-1 + pt2_overlap_imag(i,j) = 0.5d0 * (pt2_overlap_imag(i,j) - pt2_overlap_imag(j,i)) + pt2_overlap_imag(j,i) = -pt2_overlap_imag(i,j) + enddo + enddo + endif + + print *, 'Overlap of perturbed states:' + do k=1,N_states + print *, pt2_overlap(k,:) + enddo + print *, '-------' + !TODO: print imag part? + if (N_in > 0) then b%cur = min(N_in,b%cur) if (s2_eig) then @@ -362,11 +396,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm2, N_in) state_average_weight(:) = state_average_weight_save(:) TOUCH state_average_weight endif - do k=N_det+1,N_states - pt2(k) = 0.d0 - enddo - call update_pt2_and_variance_weights(pt2, variance, norm2, N_states) + call update_pt2_and_variance_weights(pt2_data, N_states) end subroutine @@ -380,7 +411,7 @@ subroutine pt2_slave_inproc(i) end -subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, variance, norm2, b, N_) +subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_) use f77_zmq use selection_types use bitmasks @@ -389,15 +420,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(in) :: relative_error, E - double precision, intent(out) :: pt2(N_states), error(N_states) - double precision, intent(out) :: variance(N_states), norm2(N_states) + type(pt2_type), intent(inout) :: pt2_data, pt2_data_err type(selection_buffer), intent(inout) :: b integer, intent(in) :: N_ - - double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) - double precision, allocatable :: vI(:,:), vI_task(:,:), T2(:) - double precision, allocatable :: nI(:,:), nI_task(:,:), T3(:) + type(pt2_type), allocatable :: pt2_data_task(:) + type(pt2_type), allocatable :: pt2_data_I(:) + type(pt2_type), allocatable :: pt2_data_S(:) + type(pt2_type), allocatable :: pt2_data_S2(:) + type(pt2_type) :: pt2_data_teeth integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer, external :: zmq_delete_tasks_async_send @@ -405,11 +436,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc integer, external :: zmq_abort integer, external :: pt2_find_sample_lr + PROVIDE pt2_stoch_istate + integer :: more, n, i, p, c, t, n_tasks, U integer, allocatable :: task_id(:) integer, allocatable :: index(:) - double precision :: v, x, x2, x3, avg, avg2, avg3, eqt, E0, v0, n0 + double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states) + double precision :: avg3im(N_states), n0im(N_states) + double precision :: eqta(N_states) double precision :: time, time1, time0 integer, allocatable :: f(:) @@ -434,11 +469,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc ! updated in ZMQ_pt2 allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) allocate(d(N_det_generators+1)) - allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) - allocate(vI(N_states, N_det_generators), vI_task(N_states, pt2_n_tasks_max)) - allocate(nI(N_states, N_det_generators), nI_task(N_states, pt2_n_tasks_max)) - allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) - allocate(T2(pt2_N_teeth+1), T3(pt2_N_teeth+1)) + allocate(pt2_data_task(pt2_n_tasks_max)) + allocate(pt2_data_I(N_det_generators)) + allocate(pt2_data_S(pt2_N_teeth+1)) + allocate(pt2_data_S2(pt2_N_teeth+1)) @@ -446,26 +480,37 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc call create_selection_buffer(N_, N_*2, b2) - pt2(:) = -huge(1.) - error(:) = huge(1.) - variance(:) = huge(1.) - norm2(:) = 0.d0 - S(:) = 0d0 - S2(:) = 0d0 - T2(:) = 0d0 - T3(:) = 0d0 + pt2_data % pt2(pt2_stoch_istate) = -huge(1.) + pt2_data_err % pt2(pt2_stoch_istate) = huge(1.) + pt2_data % variance(pt2_stoch_istate) = huge(1.) + pt2_data_err % variance(pt2_stoch_istate) = huge(1.) + pt2_data % overlap(:,pt2_stoch_istate) = 0.d0 + pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.) + !TODO: init overlap_imag? + if (is_complex) then + pt2_data % overlap_imag(:,pt2_stoch_istate) = 0.d0 + pt2_data_err % overlap_imag(:,pt2_stoch_istate) = 0.d0 + endif n = 1 t = 0 U = 0 - eI(:,:) = 0d0 - vI(:,:) = 0d0 - nI(:,:) = 0d0 + do i=1,pt2_n_tasks_max + call pt2_alloc(pt2_data_task(i),N_states) + enddo + do i=1,pt2_N_teeth+1 + call pt2_alloc(pt2_data_S(i),N_states) + call pt2_alloc(pt2_data_S2(i),N_states) + enddo + do i=1,N_det_generators + call pt2_alloc(pt2_data_I(i),N_states) + enddo f(:) = pt2_F(:) d(:) = .false. n_tasks = 0 E0 = E v0 = 0.d0 - n0 = 0.d0 + n0(:) = 0.d0 + n0im(:) = 0.d0 more = 1 call wall_time(time0) time1 = time0 @@ -485,11 +530,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc t=t+1 E0 = 0.d0 v0 = 0.d0 - n0 = 0.d0 + n0(:) = 0.d0 + n0im(:) = 0.d0 do i=pt2_n_0(t),1,-1 - E0 += eI(pt2_stoch_istate, i) - v0 += vI(pt2_stoch_istate, i) - n0 += nI(pt2_stoch_istate, i) + E0 += pt2_data_I(i) % pt2(pt2_stoch_istate) + v0 += pt2_data_I(i) % variance(pt2_stoch_istate) + n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate) + if (is_complex) then + n0im(:) += pt2_data_I(i) % overlap_imag(:,pt2_stoch_istate) + endif end do else exit @@ -499,45 +548,71 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc ! Add Stochastic part c = pt2_R(n) if(c > 0) then -!print *, 'c>0' - x = 0d0 - x2 = 0d0 - x3 = 0d0 + + call pt2_alloc(pt2_data_teeth,N_states) do p=pt2_N_teeth, 1, -1 v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1)) - x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) - x2 += vI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) - x3 += nI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) - S(p) += x - S2(p) += x*x - T2(p) += x2 - T3(p) += x3 - end do - avg = E0 + S(t) / dble(c) - avg2 = v0 + T2(t) / dble(c) - avg3 = n0 + T3(t) / dble(c) + v = pt2_W_T / pt2_w(i) + call pt2_add ( pt2_data_teeth, v, pt2_data_I(i) ) + call pt2_add ( pt2_data_S(p), 1.d0, pt2_data_teeth ) + call pt2_add2( pt2_data_S2(p), 1.d0, pt2_data_teeth ) + enddo + call pt2_dealloc(pt2_data_teeth) + + avg = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c) + avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c) + avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c) + if (is_complex) then + avg3im(:) = n0im(:) + pt2_data_S(t) % overlap_imag(:,pt2_stoch_istate) / dble(c) + endif if ((avg /= 0.d0) .or. (n == N_det_generators) ) then do_exit = .true. endif if (qp_stop()) then stop_now = .True. endif - pt2(pt2_stoch_istate) = avg - variance(pt2_stoch_istate) = avg2 - norm2(pt2_stoch_istate) = avg3 + pt2_data % pt2(pt2_stoch_istate) = avg + pt2_data % variance(pt2_stoch_istate) = avg2 + pt2_data % overlap(:,pt2_stoch_istate) = avg3(:) + if (is_complex) then + pt2_data % overlap_imag(:,pt2_stoch_istate) = avg3im(:) + endif call wall_time(time) ! 1/(N-1.5) : see Brugger, The American Statistician (23) 4 p. 32 (1969) if(c > 2) then - eqt = dabs((S2(t) / c) - (S(t)/c)**2) ! dabs for numerical stability + eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability eqt = sqrt(eqt / (dble(c) - 1.5d0)) - error(pt2_stoch_istate) = eqt + pt2_data_err % pt2(pt2_stoch_istate) = eqt + + eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability + eqt = sqrt(eqt / (dble(c) - 1.5d0)) + pt2_data_err % variance(pt2_stoch_istate) = eqt + + if (is_complex) then + eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - & + (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2 - & + (pt2_data_S(t) % overlap_imag(:,pt2_stoch_istate)/c)**2 ) ! dabs for numerical stability + else + eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability + endif + eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0)) + pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:) + + if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then time1 = time - print '(G10.3, 2X, F16.10, 2X, G10.3, 2X, F14.10, 2X, F14.10, 2X, F10.4, A10)', c, avg+E, eqt, avg2, avg3, time-time0, '' + print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, & + pt2_data % pt2(pt2_stoch_istate) +E, & + pt2_data_err % pt2(pt2_stoch_istate), & + pt2_data % variance(pt2_stoch_istate), & + pt2_data_err % variance(pt2_stoch_istate), & + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate), & + pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), & + time-time0 if (stop_now .or. ( & - (do_exit .and. (dabs(error(pt2_stoch_istate)) / & - (1.d-20 + dabs(pt2(pt2_stoch_istate)) ) <= relative_error))) ) then + (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) / & + (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(10) if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -552,10 +627,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc else if(more == 0) then exit else - call pull_pt2_results(zmq_socket_pull, index, eI_task, vI_task, nI_task, task_id, n_tasks, b2) + call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2) if(n_tasks > pt2_n_tasks_max)then print*,'PB !!!' - print*,'If you see this, send an email to Anthony scemama with the following content' + print*,'If you see this, send a bug report with the following content' print*,irp_here print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max stop -1 @@ -564,16 +639,14 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc stop 'PT2: Unable to delete tasks (send)' endif do i=1,n_tasks - if(index(i).gt.size(eI,2).or.index(i).lt.1)then + if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then print*,'PB !!!' - print*,'If you see this, send an email to Anthony scemama with the following content' + print*,'If you see this, send a bug report with the following content' print*,irp_here - print*,'i,index(i),size(ei,2) = ',i,index(i),size(ei,2) + print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1) stop -1 endif - eI(1:N_states, index(i)) += eI_task(1:N_states,i) - vI(1:N_states, index(i)) += vI_task(1:N_states,i) - nI(1:N_states, index(i)) += nI_task(1:N_states,i) + call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i)) f(index(i)) -= 1 end do do i=1, b2%cur @@ -586,6 +659,16 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc endif end if end do + do i=1,N_det_generators + call pt2_dealloc(pt2_data_I(i)) + enddo + do i=1,pt2_N_teeth+1 + call pt2_dealloc(pt2_data_S(i)) + call pt2_dealloc(pt2_data_S2(i)) + enddo + do i=1,pt2_n_tasks_max + call pt2_dealloc(pt2_data_task(i)) + enddo !print *, 'deleting b2' call delete_selection_buffer(b2) !print *, 'sorting b' diff --git a/src/cipsi/pt2_type.irp.f b/src/cipsi/pt2_type.irp.f new file mode 100644 index 00000000..7964a942 --- /dev/null +++ b/src/cipsi/pt2_type.irp.f @@ -0,0 +1,155 @@ +subroutine pt2_alloc(pt2_data,N) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + integer, intent(in) :: N + integer :: k + + allocate(pt2_data % pt2(N) & + ,pt2_data % variance(N) & + ,pt2_data % rpt2(N) & + ,pt2_data % overlap(N,N) & + ) + + pt2_data % pt2(:) = 0.d0 + pt2_data % variance(:) = 0.d0 + pt2_data % rpt2(:) = 0.d0 + pt2_data % overlap(:,:) = 0.d0 + if (is_complex) then + allocate(pt2_data % overlap_imag(N,N)) + pt2_data % overlap_imag(:,:) = 0.d0 + endif + +end subroutine + +subroutine pt2_dealloc(pt2_data) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + deallocate(pt2_data % pt2 & + ,pt2_data % variance & + ,pt2_data % rpt2 & + ,pt2_data % overlap & + ) + if (is_complex) then + deallocate(pt2_data % overlap_imag) + endif +end subroutine + +subroutine pt2_add(p1, w, p2) + implicit none + use selection_types + BEGIN_DOC +! p1 += w * p2 + END_DOC + type(pt2_type), intent(inout) :: p1 + double precision, intent(in) :: w + type(pt2_type), intent(in) :: p2 + + if (w == 1.d0) then + + p1 % pt2(:) = p1 % pt2(:) + p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap(:,:) + if (is_complex) then + p1 % overlap_imag(:,:) = p1 % overlap_imag(:,:) + p2 % overlap_imag(:,:) + endif + + else + + p1 % pt2(:) = p1 % pt2(:) + w * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + w * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + w * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap(:,:) + if (is_complex) then + p1 % overlap_imag(:,:) = p1 % overlap_imag(:,:) + w * p2 % overlap_imag(:,:) + endif + + endif + +end subroutine + + +subroutine pt2_add2(p1, w, p2) + implicit none + use selection_types + BEGIN_DOC +! p1 += w * p2**2 + END_DOC + type(pt2_type), intent(inout) :: p1 + double precision, intent(in) :: w + type(pt2_type), intent(in) :: p2 + + if (w == 1.d0) then + + p1 % pt2(:) = p1 % pt2(:) + p2 % pt2(:) * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + p2 % rpt2(:) * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + p2 % variance(:) * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap(:,:) * p2 % overlap(:,:) + if (is_complex) then + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap_imag(:,:) * p2 % overlap_imag(:,:) + endif + + else + + p1 % pt2(:) = p1 % pt2(:) + w * p2 % pt2(:) * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + w * p2 % rpt2(:) * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + w * p2 % variance(:) * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap(:,:) * p2 % overlap(:,:) + if (is_complex) then + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap_imag(:,:) * p2 % overlap_imag(:,:) + endif + + endif + +end subroutine + + +subroutine pt2_serialize(pt2_data, n, x) + implicit none + use selection_types + type(pt2_type), intent(in) :: pt2_data + integer, intent(in) :: n + double precision, intent(out) :: x(*) + + integer :: i,k,n2 + + n2 = n*n + x(1:n) = pt2_data % pt2(1:n) + k=n + x(k+1:k+n) = pt2_data % rpt2(1:n) + k=k+n + x(k+1:k+n) = pt2_data % variance(1:n) + k=k+n + x(k+1:k+n2) = reshape(pt2_data % overlap(1:n,1:n), (/ n2 /)) + if (is_complex) then + k=k+n2 + x(k+1:k+n2) = reshape(pt2_data % overlap_imag(1:n,1:n), (/ n2 /)) + endif + +end + +subroutine pt2_deserialize(pt2_data, n, x) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + integer, intent(in) :: n + double precision, intent(in) :: x(*) + + integer :: i,k,n2 + + n2 = n*n + pt2_data % pt2(1:n) = x(1:n) + k=n + pt2_data % rpt2(1:n) = x(k+1:k+n) + k=k+n + pt2_data % variance(1:n) = x(k+1:k+n) + k=k+n + pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /)) + if (is_complex) then + k=k+n2 + pt2_data % overlap_imag(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /)) + endif + +end diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index 7df98a87..d3f4d45d 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -1,8 +1,8 @@ - use omp_lib + use omp_lib use selection_types use f77_zmq BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ] - use omp_lib + use omp_lib implicit none BEGIN_DOC ! Global buffer for the OpenMP selection @@ -11,7 +11,7 @@ BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ] END_PROVIDER BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ] - use omp_lib + use omp_lib implicit none BEGIN_DOC ! Global buffer for the OpenMP selection @@ -61,7 +61,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) type(selection_buffer) :: b logical :: done, buffer_ready - double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:) + type(pt2_type), allocatable :: pt2_data(:) integer :: n_tasks, k, N integer, allocatable :: i_generator(:), subset(:) @@ -70,10 +70,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) ! logical :: sending allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) - allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) - allocate(variance(N_states,pt2_n_tasks_max)) - allocate(norm(N_states,pt2_n_tasks_max)) - + allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() integer, external :: connect_to_taskserver @@ -120,13 +117,11 @@ subroutine run_pt2_slave_small(thread,iproc,energy) double precision :: time0, time1 call wall_time(time0) do k=1,n_tasks - pt2(:,k) = 0.d0 - variance(:,k) = 0.d0 - norm(:,k) = 0.d0 - b%cur = 0 + call pt2_alloc(pt2_data(k),N_states) + b%cur = 0 !double precision :: time2 !call wall_time(time2) - call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k))) + call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) !call wall_time(time1) !print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) enddo @@ -138,11 +133,15 @@ subroutine run_pt2_slave_small(thread,iproc,energy) done = .true. endif call sort_selection_buffer(b) - call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks) + call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks) + do k=1,n_tasks + call pt2_dealloc(pt2_data(k)) + enddo b%cur=0 ! ! Try to adjust n_tasks around nproc/2 seconds per job n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0))) + n_tasks = min(n_tasks, pt2_n_tasks_max) ! n_tasks = 1 end do @@ -158,6 +157,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) if (buffer_ready) then call delete_selection_buffer(b) endif + deallocate(pt2_data) end subroutine @@ -171,8 +171,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy) integer :: rc, i integer :: worker_id, ctask, ltask - character*(512), allocatable :: task(:) - integer, allocatable :: task_id(:) + character*(512) :: task + integer :: task_id(1) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -183,20 +183,15 @@ subroutine run_pt2_slave_large(thread,iproc,energy) type(selection_buffer) :: b logical :: done, buffer_ready - double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:) + type(pt2_type) :: pt2_data(1) integer :: n_tasks, k, N - integer, allocatable :: i_generator(:), subset(:) + integer :: i_generator(1), subset integer :: bsize ! Size of selection buffers logical :: sending PROVIDE global_selection_buffer global_selection_buffer_lock - allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) - allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) - allocate(variance(N_states,pt2_n_tasks_max)) - allocate(norm(N_states,pt2_n_tasks_max)) - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() integer, external :: connect_to_taskserver @@ -215,22 +210,17 @@ subroutine run_pt2_slave_large(thread,iproc,energy) done = .False. do while (.not.done) - n_tasks = max(1,n_tasks) - n_tasks = min(pt2_n_tasks_max,n_tasks) - integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then exit endif - done = task_id(n_tasks) == 0 + done = task_id(1) == 0 if (done) then n_tasks = n_tasks-1 endif if (n_tasks == 0) exit - do k=1,n_tasks - read (task(k),*) subset(k), i_generator(k), N - enddo + read (task,*) subset, i_generator(1), N if (b%N == 0) then ! Only first time bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) @@ -242,17 +232,13 @@ subroutine run_pt2_slave_large(thread,iproc,energy) double precision :: time0, time1 call wall_time(time0) - do k=1,n_tasks - pt2(:,k) = 0.d0 - variance(:,k) = 0.d0 - norm(:,k) = 0.d0 - b%cur = 0 + call pt2_alloc(pt2_data(1),N_states) + b%cur = 0 !double precision :: time2 !call wall_time(time2) - call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k))) + call select_connected(i_generator(1),energy,pt2_data(1),b,subset,pt2_F(i_generator(1))) !call wall_time(time1) !print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) - enddo call wall_time(time1) !print *, '-->', i_generator(1), time1-time0, n_tasks @@ -269,16 +255,14 @@ subroutine run_pt2_slave_large(thread,iproc,energy) call omp_unset_lock(global_selection_buffer_lock) if ( iproc == 1 ) then call omp_set_lock(global_selection_buffer_lock) - call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, global_selection_buffer, task_id, n_tasks,sending) + call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, global_selection_buffer, task_id, n_tasks,sending) global_selection_buffer%cur = 0 call omp_unset_lock(global_selection_buffer_lock) else - call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks,sending) + call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks,sending) endif -! ! Try to adjust n_tasks around nproc/2 seconds per job -! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0))) - n_tasks = 1 + call pt2_dealloc(pt2_data(1)) end do call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) @@ -298,39 +282,36 @@ subroutine run_pt2_slave_large(thread,iproc,energy) end subroutine -subroutine push_pt2_results(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks) +subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks) use selection_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2(N_states,n_tasks) - double precision, intent(in) :: variance(N_states,n_tasks) - double precision, intent(in) :: norm(N_states,n_tasks) + type(pt2_type), intent(in) :: pt2_data(n_tasks) integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) type(selection_buffer), intent(inout) :: b logical :: sending sending = .False. - call push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending) + call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending) call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending) end subroutine -subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending) +subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending) use selection_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2(N_states,n_tasks) - double precision, intent(in) :: variance(N_states,n_tasks) - double precision, intent(in) :: norm(N_states,n_tasks) + type(pt2_type), intent(in) :: pt2_data(n_tasks) integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) type(selection_buffer), intent(inout) :: b logical, intent(inout) :: sending - integer :: rc + integer :: rc, i integer*8 :: rc8 + double precision, allocatable :: pt2_serialized(:,:) if (sending) then print *, irp_here, ': sending is true' @@ -358,32 +339,18 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, no endif - rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states*n_tasks, ZMQ_SNDMORE) + allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) ) + do i=1,n_tasks + call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i)) + enddo + + rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) + deallocate(pt2_serialized) if (rc == -1) then print *, irp_here, ': error sending result' stop 3 return - else if(rc /= 8*N_states*n_tasks) then - stop 'push' - endif - - - rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states*n_tasks, ZMQ_SNDMORE) - if (rc == -1) then - print *, irp_here, ': error sending result' - stop 4 - return - else if(rc /= 8*N_states*n_tasks) then - stop 'push' - endif - - - rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states*n_tasks, ZMQ_SNDMORE) - if (rc == -1) then - print *, irp_here, ': error sending result' - stop 5 - return - else if(rc /= 8*N_states*n_tasks) then + else if(rc /= size(pt2_serialized)*8) then stop 'push' endif @@ -475,7 +442,7 @@ IRP_ELSE stop 11 return else if (rc /= 8) then - print *, irp_here//': error in receiving mini' + print *, irp_here//': error in receiving mini' stop 12 endif IRP_ENDIF @@ -484,19 +451,18 @@ end subroutine -subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id, n_tasks, b) +subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b) use selection_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2(N_states,*) - double precision, intent(inout) :: variance(N_states,*) - double precision, intent(inout) :: norm(N_states,*) + type(pt2_type), intent(inout) :: pt2_data(*) type(selection_buffer), intent(inout) :: b integer, intent(out) :: index(*) integer, intent(out) :: n_tasks, task_id(*) integer :: rc, rn, i integer*8 :: rc8 + double precision, allocatable :: pt2_serialized(:,:) rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) if (rc == -1) then @@ -514,29 +480,19 @@ subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id stop 'pull' endif - rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8*n_tasks, 0) + allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) ) + rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0) if (rc == -1) then n_tasks = 1 task_id(1) = 0 - else if(rc /= 8*N_states*n_tasks) then + else if(rc /= 8*size(pt2_serialized)) then stop 'pull' endif - rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8*n_tasks, 0) - if (rc == -1) then - n_tasks = 1 - task_id(1) = 0 - else if(rc /= 8*N_states*n_tasks) then - stop 'pull' - endif - - rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8*n_tasks, 0) - if (rc == -1) then - n_tasks = 1 - task_id(1) = 0 - else if(rc /= 8*N_states*n_tasks) then - stop 'pull' - endif + do i=1,n_tasks + call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i)) + enddo + deallocate(pt2_serialized) rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0) if (rc == -1) then diff --git a/src/cipsi/run_selection_slave.irp.f b/src/cipsi/run_selection_slave.irp.f index 0ebdd4fd..0d06d1d0 100644 --- a/src/cipsi/run_selection_slave.irp.f +++ b/src/cipsi/run_selection_slave.irp.f @@ -18,9 +18,8 @@ subroutine run_selection_slave(thread,iproc,energy) type(selection_buffer) :: buf, buf2 logical :: done, buffer_ready - double precision :: pt2(N_states) - double precision :: variance(N_states) - double precision :: norm(N_states) + type(pt2_type) :: pt2_data + !todo: check for providers that are now unlinked for real/complex PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -33,6 +32,7 @@ subroutine run_selection_slave(thread,iproc,energy) PROVIDE psi_selectors_coef_transp psi_det_sorted weight_selection endif + call pt2_alloc(pt2_data,N_states) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -47,9 +47,6 @@ subroutine run_selection_slave(thread,iproc,energy) buf%N = 0 buffer_ready = .False. ctask = 1 - pt2(:) = 0d0 - variance(:) = 0d0 - norm(:) = 0.d0 do integer, external :: get_task_from_taskserver @@ -74,7 +71,7 @@ subroutine run_selection_slave(thread,iproc,energy) stop '-1' end if end if - call select_connected(i_generator,energy,pt2,variance,norm,buf,subset,pt2_F(i_generator)) + call select_connected(i_generator,energy,pt2_data,buf,subset,pt2_F(i_generator)) endif integer, external :: task_done_to_taskserver @@ -93,12 +90,10 @@ subroutine run_selection_slave(thread,iproc,energy) if(ctask > 0) then call sort_selection_buffer(buf) ! call merge_selection_buffers(buf,buf2) -!print *, task_id(1), pt2(1), buf%cur, ctask - call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask) + call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask) + call pt2_dealloc(pt2_data) + call pt2_alloc(pt2_data,N_states) ! buf%mini = buf2%mini - pt2(:) = 0d0 - variance(:) = 0d0 - norm(:) = 0d0 buf%cur = 0 end if ctask = 0 @@ -111,14 +106,12 @@ subroutine run_selection_slave(thread,iproc,energy) if(ctask > 0) then call sort_selection_buffer(buf) ! call merge_selection_buffers(buf,buf2) - call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask) + call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask) ! buf%mini = buf2%mini - pt2(:) = 0d0 - variance(:) = 0d0 - norm(:) = 0d0 buf%cur = 0 end if ctask = 0 + call pt2_dealloc(pt2_data) integer, external :: disconnect_from_taskserver if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then @@ -134,18 +127,17 @@ subroutine run_selection_slave(thread,iproc,energy) end subroutine -subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_id, ntask) +subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks) use f77_zmq use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2(N_states) - double precision, intent(in) :: variance(N_states) - double precision, intent(in) :: norm(N_states) + type(pt2_type), intent(in) :: pt2_data type(selection_buffer), intent(inout) :: b - integer, intent(in) :: ntask, task_id(*) + integer, intent(in) :: ntasks, task_id(*) integer :: rc + double precision, allocatable :: pt2_serialized(:) rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE) if(rc /= 4) then @@ -153,20 +145,18 @@ subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_ endif - rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) then - print *, 'f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)' - endif + allocate(pt2_serialized (pt2_type_size(N_states)) ) + call pt2_serialize(pt2_data,N_states,pt2_serialized) - rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) then - print *, 'f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)' - endif - - rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) then - print *, 'f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)' + rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 3 + return + else if(rc /= size(pt2_serialized)*8) then + stop 'push' endif + deallocate(pt2_serialized) if (b%cur > 0) then @@ -182,14 +172,14 @@ subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_ endif - rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE) if(rc /= 4) then - print *, 'f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)' + print *, 'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)' endif - rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0) - if(rc /= 4*ntask) then - print *, 'f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)' + rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0) + if(rc /= 4*ntasks) then + print *, 'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)' endif ! Activate is zmq_socket_push is a REQ @@ -206,42 +196,34 @@ IRP_ENDIF end subroutine -subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det, N, task_id, ntask) +subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks) use f77_zmq use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data double precision, intent(out) :: val(*) integer(bit_kind), intent(out) :: det(N_int, 2, *) - integer, intent(out) :: N, ntask, task_id(*) + integer, intent(out) :: N, ntasks, task_id(*) integer :: rc, rn, i + double precision, allocatable :: pt2_serialized(:) rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) then print *, 'f77_zmq_recv( zmq_socket_pull, N, 4, 0)' endif - pt2(:) = 0.d0 - variance(:) = 0.d0 - norm(:) = 0.d0 - - rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0) - if(rc /= 8*N_states) then - print *, 'f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)' + allocate(pt2_serialized (pt2_type_size(N_states)) ) + rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0) + if (rc == -1) then + ntasks = 1 + task_id(1) = 0 + else if(rc /= 8*size(pt2_serialized)) then + stop 'pull' endif - rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0) - if(rc /= 8*N_states) then - print *, 'f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)' - endif - - rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0) - if(rc /= 8*N_states) then - print *, 'f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)' - endif + call pt2_deserialize(pt2_data,N_states,pt2_serialized) + deallocate(pt2_serialized) if (N>0) then rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0) @@ -255,14 +237,14 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det endif endif - rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) + rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0) if(rc /= 4) then - print *, 'f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)' + print *, 'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)' endif - rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0) - if(rc /= 4*ntask) then - print *, 'f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)' + rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0) + if(rc /= 4*ntasks) then + print *, 'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)' endif ! Activate is zmq_socket_pull is a REP diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 4513e9cf..f34be9cf 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -19,22 +19,26 @@ BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ] variance_match_weight(:) = 1.d0 END_PROVIDER -subroutine update_pt2_and_variance_weights(pt2, variance, norm2, N_st) +subroutine update_pt2_and_variance_weights(pt2_data, N_st) implicit none + use selection_types BEGIN_DOC ! Updates the PT2- and Variance- matching weights. END_DOC integer, intent(in) :: N_st - double precision, intent(in) :: pt2(N_st) - double precision, intent(in) :: variance(N_st) - double precision, intent(in) :: norm2(N_st) + type(pt2_type), intent(in) :: pt2_data + double precision :: pt2(N_st) + double precision :: variance(N_st) - double precision :: avg, rpt2(N_st), element, dt, x + double precision :: avg, element, dt, x integer :: k integer, save :: i_iter=0 integer, parameter :: i_itermax = 1 double precision, allocatable, save :: memo_variance(:,:), memo_pt2(:,:) + pt2(:) = pt2_data % pt2(:) + variance(:) = pt2_data % variance(:) + if (i_iter == 0) then allocate(memo_variance(N_st,i_itermax), memo_pt2(N_st,i_itermax)) memo_pt2(:,:) = 1.d0 @@ -48,11 +52,6 @@ subroutine update_pt2_and_variance_weights(pt2, variance, norm2, N_st) dt = 2.0d0 - do k=1,N_st - ! rPT2 - rpt2(k) = pt2(k)/(1.d0 + norm2(k)) - enddo - avg = sum(pt2(1:N_st)) / dble(N_st) - 1.d-32 ! Avoid future division by zero do k=1,N_st element = exp(dt*(pt2(k)/avg -1.d0)) @@ -179,16 +178,14 @@ subroutine get_mask_phase(det1, pm, Nint) end subroutine -subroutine select_connected(i_generator,E0,pt2,variance,norm2,b,subset,csubset) +subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) !todo: simplify for kpts use bitmasks use selection_types implicit none integer, intent(in) :: i_generator, subset, csubset type(selection_buffer), intent(inout) :: b - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm2(N_states) + type(pt2_type), intent(inout) :: pt2_data integer :: k,l double precision, intent(in) :: E0(N_states) @@ -209,7 +206,7 @@ subroutine select_connected(i_generator,E0,pt2,variance,norm2,b,subset,csubset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm2,b,subset,csubset) + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset) deallocate(fock_diag_tmp) end subroutine @@ -258,7 +255,7 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint) end -subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm2,buf,subset,csubset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -270,9 +267,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm2(N_states) + type(pt2_type), intent(inout) :: pt2_data type(selection_buffer), intent(inout) :: buf integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii,sze @@ -746,7 +741,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call splash_pq_complex(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat_complex, interesting) if(.not.pert_2rdm)then - call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat_complex, buf) + !call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat_complex, buf) + call fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat_complex, buf) else print*,irp_here,' not implemented for complex (fill_buffer_double_rdm_complex)' stop -1 @@ -756,9 +752,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) if(.not.pert_2rdm)then - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf) + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) else - call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0)) + call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0)) endif endif!complex end if @@ -787,7 +783,7 @@ end subroutine -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf) +subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types implicit none @@ -795,16 +791,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d integer, intent(in) :: i_generator, sp, h1, h2 double precision, intent(in) :: mat(N_states, mo_num, mo_num) logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num) - double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm2(N_states) + type(pt2_type), intent(inout) :: pt2_data type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate + integer :: s1, s2, p1, p2, ib, j, istate, jstate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, w, tmp, alpha_h_psi, coef + double precision :: e_pert(N_states), coef(N_states), X(N_states) + double precision :: delta_E, val, Hii, w, tmp, alpha_h_psi double precision, external :: diag_H_mat_elem_fock double precision :: E_shift @@ -812,7 +807,12 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision, allocatable :: values(:) integer, allocatable :: keys(:,:) integer :: nkeys - + double precision :: s_weight(N_states,N_states) + do jstate=1,N_states + do istate=1,N_states + s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) + enddo + enddo if(sp == 3) then s1 = 1 @@ -902,18 +902,42 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if (delta_E < 0.d0) then tmp = -tmp endif - e_pert = 0.5d0 * (tmp - delta_E) + e_pert(istate) = 0.5d0 * (tmp - delta_E) if (dabs(alpha_h_psi) > 1.d-4) then - coef = e_pert / alpha_h_psi + coef(istate) = e_pert(istate) / alpha_h_psi else - coef = alpha_h_psi / delta_E + coef(istate) = alpha_h_psi / delta_E endif - pt2(istate) = pt2(istate) + e_pert - variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi - norm2(istate) = norm2(istate) + coef * coef + if (e_pert(istate) < 0.d0) then + X(istate) = -dsqrt(-e_pert(istate)) + else + X(istate) = dsqrt(e_pert(istate)) + endif + enddo + +! ! Gram-Schmidt using input overlap matrix +! do istate=1,N_states +! do jstate=1,istate-1 +! if ( (pt2_overlap(jstate,istate) == 0.d0).or.(pt2_overlap(jstate,jstate) == 0.d0) ) cycle +! coef(istate) = coef(istate) - pt2_overlap(jstate,istate)/pt2_overlap(jstate,jstate) * coef(jstate) +! enddo +! enddo + + do istate=1, N_states + do jstate=1,N_states + pt2_data % overlap(jstate,istate) += coef(jstate) * coef(istate) + enddo + enddo + + do istate=1,N_states + alpha_h_psi = mat(istate, p1, p2) + + pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi + pt2_data % pt2(istate) += e_pert(istate) !!!DEBUG -! pt2(istate) = pt2(istate) - e_pert + alpha_h_psi**2/delta_E +! delta_E = E0(istate) - Hii + E_shift +! pt2_data % pt2(istate) = pt2_data % pt2(istate) + alpha_h_psi**2/delta_E ! ! integer :: k ! double precision :: alpha_h_psi_2,hij @@ -934,14 +958,26 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection - w = w - alpha_h_psi * alpha_h_psi * selection_weight(istate) + w = w - alpha_h_psi * alpha_h_psi * s_weight(istate,istate) + do jstate=1,N_states + if (istate == jstate) cycle + w = w + alpha_h_psi*mat(jstate,p1,p2) * s_weight(istate,jstate) + enddo case(6) - w = w - coef * coef * selection_weight(istate) + w = w - coef(istate) * coef(istate) * s_weight(istate,istate) + do jstate=1,N_states + if (istate == jstate) cycle + w = w + coef(istate)*coef(jstate) * s_weight(istate,jstate) + enddo case default ! Energy selection - w = w + e_pert * selection_weight(istate) + w = w + e_pert(istate) * s_weight(istate,istate) + do jstate=1,N_states + if (istate == jstate) cycle + w = w - dabs(X(istate))*X(jstate) * s_weight(istate,jstate) + enddo end select end do @@ -2049,7 +2085,7 @@ end ! ! !==============================================================================! -subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm2, mat, buf) +subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) !todo: should be okay for complex use bitmasks use selection_types @@ -2058,17 +2094,16 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned integer, intent(in) :: i_generator, sp, h1, h2 complex*16, intent(in) :: mat(N_states, mo_num, mo_num) logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num) - double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm2(N_states) + type(pt2_type), intent(inout) :: pt2_date type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate + integer :: s1, s2, p1, p2, ib, j, istate, jstate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, w, tmp - complex*16 :: alpha_h_psi, coef, val_c + double precision :: e_pert(n_states), x(n_states) + double precision :: delta_E, val, Hii, w, tmp + complex*16 :: alpha_h_psi, coef(n_states), val_c double precision, external :: diag_H_mat_elem_fock double precision :: E_shift @@ -2076,7 +2111,12 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned ! double precision, allocatable :: values(:) ! integer, allocatable :: keys(:,:) ! integer :: nkeys - + double precision :: s_weight(n_states,n_states) + do jstate=1,n_states + do istate=1,n_states + s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) + enddo + enddo if(sp == 3) then s1 = 1 @@ -2166,15 +2206,32 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned if (delta_E < 0.d0) then tmp = -tmp endif - e_pert = 0.5d0 * (tmp - delta_E) + e_pert(istate) = 0.5d0 * (tmp - delta_E) + !TODO: check conjugate for coef if (cdabs(alpha_h_psi) > 1.d-4) then - coef = e_pert / alpha_h_psi + coef(istate) = e_pert / alpha_h_psi else - coef = alpha_h_psi / delta_E + coef(istate) = alpha_h_psi / delta_E endif - pt2(istate) = pt2(istate) + e_pert - variance(istate) = variance(istate) + cdabs(alpha_h_psi * alpha_h_psi) - norm2(istate) = norm2(istate) + cdabs(coef * coef) + if (e_pert(istate) < 0.d0) then + x(istate) = -dsqrt(-e_pert(istate)) + else + x(istate) = dsqrt(e_pert(istate)) + endif + enddo + + do istate=1,n_states + do jstate=1,n_states + val_c = coef(jstate) * dconjg(coef(istate)) + pt2_data % overlap(jstate,istate) += dble(val_c) + pt2_data % overlap_imag(jstate,istate) += dimag(val_c) + enddo + enddo + + do istate=1,n_states + alpha_h_psi = mat(istate, p1, p2) + pt2_data % variance(istate) += cdabs(alpha_h_psi * alpha_h_psi) + pt2_data % pt2(istate) += e_pert(istate) !!!DEBUG ! integer :: k @@ -2194,16 +2251,30 @@ subroutine fill_buffer_double_complex(i_generator, sp, h1, h2, bannedOrb, banned select case (weight_selection) + !TODO: check off-diagonals case(5) ! Variance selection - w = w - cdabs(alpha_h_psi * alpha_h_psi) * selection_weight(istate) + w = w - cdabs(alpha_h_psi * alpha_h_psi) * s_weight(istate,istate) + do jstate=1,n_states + if (istate == jstate) cycle + w = w + cdabs(alpha_h_psi * mat(jstate,p1,p2)) * s_weight(istate,jstate) + enddo case(6) - w = w - cdabs(coef * coef) * selection_weight(istate) + w = w - cdabs(coef(istate) * coef(istate)) * s_weight(istate,istate) + do jstate=1,n_states + if (istate == jstate) cycle + w = w + cdabs(coef(istate)*coef(jstate)) * s_weight(istate,jstate) + enddo case default ! Energy selection - w = w + e_pert * selection_weight(istate) + w = w + e_pert(istate) * s_weight(istate,istate) + do jstate=1,n_states + if (istate == jstate) cycle + !TODO: why dabs? + w = w - dabs(x(istate))*x(jstate) * s_weight(istate,jstate) + enddo end select end do diff --git a/src/cipsi/selection_types.f90 b/src/cipsi/selection_types.f90 index 29e48524..53250b57 100644 --- a/src/cipsi/selection_types.f90 +++ b/src/cipsi/selection_types.f90 @@ -5,5 +5,26 @@ module selection_types double precision, pointer :: val(:) double precision :: mini endtype + + type pt2_type + double precision, allocatable :: pt2(:) + double precision, allocatable :: rpt2(:) + double precision, allocatable :: variance(:) + double precision, allocatable :: overlap(:,:) + endtype + + contains + + integer function pt2_type_size(N) + implicit none + integer, intent(in) :: N + if (is_complex) then + pt2_type_size = (3*n + 2*n*n) + else + pt2_type_size = (3*n + n*n) + endif + + end function + end module diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index f9d0ea42..ea3dce9d 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -1,12 +1,15 @@ subroutine run_stochastic_cipsi + use selection_types implicit none BEGIN_DOC ! Selected Full Configuration Interaction with Stochastic selection and PT2. END_DOC integer :: i,j,k - double precision, allocatable :: pt2(:), variance(:), norm2(:), rpt2(:), zeros(:) + double precision, allocatable :: zeros(:) integer :: to_select - logical, external :: qp_stop + type(pt2_type) :: pt2_data, pt2_data_err + logical, external :: qp_stop + double precision :: rss double precision, external :: memory_of_double @@ -19,7 +22,9 @@ subroutine run_stochastic_cipsi rss = memory_of_double(N_states)*4.d0 call check_mem(rss,irp_here) - allocate (pt2(N_states), zeros(N_states), rpt2(N_states), norm2(N_states), variance(N_states)) + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) double precision :: hf_energy_ref logical :: has @@ -28,10 +33,13 @@ subroutine run_stochastic_cipsi relative_error=PT2_relative_error zeros = 0.d0 - pt2 = -huge(1.e0) - rpt2 = -huge(1.e0) - norm2 = 0.d0 - variance = huge(1.e0) + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap= 0.d0 + pt2_data % variance = huge(1.e0) + if (is_complex) then + pt2_data % overlap_imag = 0.d0 + endif if (s2_eig) then call make_s2_eigenfunction @@ -73,14 +81,13 @@ subroutine run_stochastic_cipsi endif double precision :: correlation_energy_ratio - double precision :: error(N_states) correlation_energy_ratio = 0.d0 do while ( & (N_det < N_det_max) .and. & - (maxval(abs(rpt2(1:N_states))) > pt2_max) .and. & - (maxval(abs(variance(1:N_states))) > variance_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. & (correlation_energy_ratio <= correlation_energy_ratio_max) & ) write(*,'(A)') '--------------------------------------------------------------------------------' @@ -89,26 +96,24 @@ subroutine run_stochastic_cipsi to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) - pt2 = 0.d0 - variance = 0.d0 - norm2 = 0.d0 - call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & - norm2, to_select) ! Stochastic PT2 and selection - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm2(k)) - enddo + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error,to_select) ! Stochastic PT2 and selection correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref) / & - (psi_energy_with_nucl_rep(1) + rpt2(1) - hf_energy_ref) + (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref) correlation_energy_ratio = min(1.d0,correlation_energy_ratio) call write_double(6,correlation_energy_ratio, 'Correlation ratio') - call print_summary(psi_energy_with_nucl_rep,pt2,error,variance,norm2,N_det,N_occ_pattern,N_states,psi_s2) + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) - call save_energy(psi_energy_with_nucl_rep, rpt2) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() N_iter += 1 @@ -147,20 +152,19 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, zeros) endif - pt2(:) = 0.d0 - variance(:) = 0.d0 - norm2(:) = 0.d0 - call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, & - norm2,0) ! Stochastic PT2 + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm2(k)) - enddo - - call save_energy(psi_energy_with_nucl_rep, rpt2) - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm2,N_det,N_occ_pattern,N_states,psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + call print_summary(psi_energy_with_nucl_rep, & + pt2_data , pt2_data_err, N_det, N_occ_pattern, N_states, psi_s2) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() endif + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) end diff --git a/src/cipsi/zmq_selection.irp.f b/src/cipsi/zmq_selection.irp.f index 4456b53e..5ac1d6fb 100644 --- a/src/cipsi/zmq_selection.irp.f +++ b/src/cipsi/zmq_selection.irp.f @@ -1,4 +1,4 @@ -subroutine ZMQ_selection(N_in, pt2, variance, norm2) +subroutine ZMQ_selection(N_in, pt2_data) use f77_zmq use selection_types @@ -7,11 +7,9 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2) integer(ZMQ_PTR) :: zmq_to_qp_run_socket , zmq_socket_pull integer, intent(in) :: N_in type(selection_buffer) :: b - integer :: i, N + integer :: i, l, N integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - double precision, intent(out) :: variance(N_states) - double precision, intent(out) :: norm2(N_states) + type(pt2_type), intent(inout) :: pt2_data ! PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators @@ -107,6 +105,12 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2) f(:) = 1.d0 if (.not.do_pt2) then +<<<<<<< HEAD + double precision :: f(N_states), u_dot_u + do k=1,min(N_det,N_states) + f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors) + enddo +======= double precision :: f(N_states), u_dot_u if (is_complex) then double precision :: u_dot_u_complex @@ -118,22 +122,19 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2) f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors) enddo endif +>>>>>>> origin/cleaning_kpts endif - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, variance, norm2) PRIVATE(i) NUM_THREADS(nproc_target+1) + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2_data) PRIVATE(i) NUM_THREADS(nproc_target+1) i = omp_get_thread_num() if (i==0) then - call selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2) + call selection_collector(zmq_socket_pull, b, N, pt2_data) else call selection_slave_inproc(i) endif !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection') - do i=N_det+1,N_states - pt2(i) = 0.d0 - variance(i) = 0.d0 - norm2(i) = 0.d0 - enddo if (N_in > 0) then if (s2_eig) then call make_selection_buffer_s2(b) @@ -141,13 +142,28 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm2) call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) endif call delete_selection_buffer(b) + do k=1,N_states - pt2(k) = pt2(k) * f(k) - variance(k) = variance(k) * f(k) - norm2(k) = norm2(k) * f(k) + pt2_data % pt2(k) = pt2_data % pt2(k) * f(k) + pt2_data % variance(k) = pt2_data % variance(k) * f(k) + do l=1,N_states + pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l)) + pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l)) + enddo + + pt2_data % rpt2(k) = & + pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k)) enddo - call update_pt2_and_variance_weights(pt2, variance, norm2, N_states) + pt2_overlap(:,:) = pt2_data % overlap(:,:) + + print *, 'Overlap of perturbed states:' + do l=1,N_states + print *, pt2_overlap(l,:) + enddo + print *, '-------' + SOFT_TOUCH pt2_overlap + call update_pt2_and_variance_weights(pt2_data, N_states) end subroutine @@ -159,7 +175,7 @@ subroutine selection_slave_inproc(i) call run_selection_slave(1,i,pt2_e0_denominator) end -subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2) +subroutine selection_collector(zmq_socket_pull, b, N, pt2_data) use f77_zmq use selection_types use bitmasks @@ -169,12 +185,12 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2) integer(ZMQ_PTR), intent(in) :: zmq_socket_pull type(selection_buffer), intent(inout) :: b integer, intent(in) :: N - double precision, intent(out) :: pt2(N_states) - double precision, intent(out) :: variance(N_states) - double precision, intent(out) :: norm2(N_states) - double precision :: pt2_mwen(N_states) - double precision :: variance_mwen(N_states) - double precision :: norm2_mwen(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(pt2_type) :: pt2_data_tmp + + double precision :: pt2_mwen(N_states) + double precision :: variance_mwen(N_states) + double precision :: norm2_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -192,24 +208,24 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() call create_selection_buffer(N, N*2, b2) + integer :: k double precision :: rss double precision, external :: memory_of_int rss = memory_of_int(N_det_generators) call check_mem(rss,irp_here) allocate(task_id(N_det_generators)) more = 1 - pt2(:) = 0d0 - variance(:) = 0.d0 - norm2(:) = 0.d0 - pt2_mwen(:) = 0.d0 - variance_mwen(:) = 0.d0 - norm2_mwen(:) = 0.d0 + pt2_data % pt2(:) = 0d0 + pt2_data % variance(:) = 0.d0 + pt2_data % overlap(:,:) = 0.d0 + if (is_complex) then + pt2_data % overlap_imag(:,:) = 0.d0 + endif + call pt2_alloc(pt2_data_tmp,N_states) do while (more == 1) - call pull_selection_results(zmq_socket_pull, pt2_mwen, variance_mwen, norm2_mwen, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) + call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) - pt2(:) += pt2_mwen(:) - variance(:) += variance_mwen(:) - norm2(:) += norm2_mwen(:) + call pt2_add(pt2_data, 1.d0, pt2_data_tmp) do i=1, b2%cur call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i)) if (b2%val(i) > b%mini) exit @@ -225,6 +241,7 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm2) endif end do end do + call pt2_dealloc(pt2_data_tmp) call delete_selection_buffer(b2) diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg index 489bf6da..b03ab374 100644 --- a/src/determinants/EZFIO.cfg +++ b/src/determinants/EZFIO.cfg @@ -44,7 +44,7 @@ default: 2 type: integer doc: Weight used in the selection. 0: input state-average weight, 1: 1./(c_0^2), 2: rPT2 matching, 3: variance matching, 4: variance and rPT2 matching, 5: variance minimization and matching, 6: CI coefficients 7: input state-average multiplied by variance and rPT2 matching 8: input state-average multiplied by rPT2 matching 9: input state-average multiplied by variance matching interface: ezfio,provider,ocaml -default: 2 +default: 1 [threshold_generators] type: Threshold diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f index 83994781..eb11e28c 100644 --- a/src/fci/pt2.irp.f +++ b/src/fci/pt2.irp.f @@ -28,35 +28,38 @@ end subroutine run implicit none + use selection_types integer :: i,j,k logical, external :: detEq - double precision :: pt2(N_states) + type(pt2_type) :: pt2_data, pt2_data_err integer :: degree integer :: n_det_before, to_select double precision :: threshold_davidson_in - double precision :: E_CI_before(N_states), relative_error, error(N_states), variance(N_states), norm2(N_states), rpt2(N_states) + double precision :: relative_error + double precision, allocatable :: E_CI_before(:) - pt2(:) = 0.d0 + allocate ( E_CI_before(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) E_CI_before(:) = psi_energy(:) + nuclear_repulsion relative_error=PT2_relative_error if (do_pt2) then - call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & - norm2,0) ! Stochastic PT2 + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 else - call ZMQ_selection(0, pt2, variance, norm2) + call ZMQ_selection(0, pt2_data) endif - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm2(k)) - enddo + call print_summary(psi_energy_with_nucl_rep(1:N_states), & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm2,N_det,N_occ_pattern,N_states,psi_s2) - - call save_energy(E_CI_before,pt2) + call save_energy(E_CI_before, pt2_data % pt2) + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + deallocate(E_CI_before) end diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index 6f13e595..54845685 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -11,6 +11,7 @@ function run() { qp edit --check qp reset --mos qp set scf_utils n_it_scf_max 50 + qp set ao_one_e_ints lin_dep_cutoff 1.e-50 qp run scf # qp set_frozen_core energy="$(ezfio get hartree_fock energy)" diff --git a/src/iterations/print_summary.irp.f b/src/iterations/print_summary.irp.f index 87bc28fa..0fabffc7 100644 --- a/src/iterations/print_summary.irp.f +++ b/src/iterations/print_summary.irp.f @@ -1,16 +1,17 @@ -subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_st,s2_) +subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_occ_pattern_,n_st,s2_) + use selection_types implicit none BEGIN_DOC ! Print the extrapolated energy in the output END_DOC integer, intent(in) :: n_det_, n_occ_pattern_, n_st - double precision, intent(in) :: e_(n_st), pt2_(n_st), variance_(n_st), norm_(n_st), error_(n_st), s2_(n_st) + double precision, intent(in) :: e_(n_st), s2_(n_st) + type(pt2_type) , intent(in) :: pt2_data, pt2_data_err integer :: i, k integer :: N_states_p character*(9) :: pt2_string character*(512) :: fmt - double precision :: f(n_st) if (do_pt2) then pt2_string = ' ' @@ -20,10 +21,6 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ N_states_p = min(N_det_,n_st) - do i=1,N_states_p - f(i) = 1.d0/(1.d0+norm_(i)) - enddo - print *, '' print '(A,I12)', 'Summary at N_det = ', N_det_ print '(A)', '-----------------------------------' @@ -42,16 +39,16 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0 endif write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2 '//pt2_string, (pt2_(k), error_(k), k=1,N_states_p) - write(*,fmt) '# rPT2'//pt2_string, (pt2_(k)*f(k), error_(k)*f(k), k=1,N_states_p) + write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p) + write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p) write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (e_(k)+pt2_(k),error_(k), k=1,N_states_p) - write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_(k)*f(k),error_(k)*f(k), k=1,N_states_p) + write(*,fmt) '# E+PT2 ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p) + write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p) if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_(k)-e_(1)-pt2_(1)), & - dsqrt(error_(k)*error_(k)+error_(1)*error_(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_(k)-e_(1)-pt2_(1))*27.211396641308d0, & - dsqrt(error_(k)*error_(k)+error_(1)*error_(1))*27.211396641308d0, k=1,N_states_p) + write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), & + dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p) + write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, & + dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p) endif write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' write(*,fmt) @@ -68,12 +65,12 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ print*,'* State ',k print *, '< S^2 > = ', s2_(k) print *, 'E = ', e_(k) - print *, 'Variance = ', variance_(k) - print *, 'PT norm = ', dsqrt(norm_(k)) - print *, 'PT2 = ', pt2_(k) - print *, 'rPT2 = ', pt2_(k)*f(k) - print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_(k), ' +/- ', error_(k) - print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_(k)*f(k), ' +/- ', error_(k)*f(k) + print *, 'Variance = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k) + print *, 'PT norm = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k)) + print *, 'PT2 = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) + print *, 'rPT2 = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) + print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) + print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) print *, '' enddo @@ -87,14 +84,14 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ print *, '-----' print*, 'Variational + perturbative Energy difference (au | eV)' do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_(i) - (e_(1) + pt2_(1))), & - (e_(i)+ pt2_(i) - (e_(1) + pt2_(1))) * 27.211396641308d0 + print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), & + (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0 enddo print *, '-----' print*, 'Variational + renormalized perturbative Energy difference (au | eV)' do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_(i)*f(i) - (e_(1) + pt2_(1)*f(1))), & - (e_(i)+ pt2_(i)*f(i) - (e_(1) + pt2_(1)*f(1))) * 27.211396641308d0 + print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), & + (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0 enddo endif diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index d1236ce7..dc44e262 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -186,10 +186,10 @@ END_DOC implicit none - double precision,intent(in) :: Fock_matrix_DIIS(ao_num,ao_num,*),error_matrix_DIIS(ao_num,ao_num,*) + integer,intent(inout) :: dim_DIIS + double precision,intent(in) :: Fock_matrix_DIIS(ao_num,ao_num,dim_DIIS),error_matrix_DIIS(ao_num,ao_num,dim_DIIS) integer,intent(in) :: iteration_SCF, size_Fock_matrix_AO double precision,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num) - integer,intent(inout) :: dim_DIIS double precision,allocatable :: B_matrix_DIIS(:,:),X_vector_DIIS(:) double precision,allocatable :: C_vector_DIIS(:) @@ -212,11 +212,12 @@ END_DOC ) ! Compute the matrices B and X + B_matrix_DIIS(:,:) = 0.d0 do j=1,dim_DIIS + j_DIIS = min(dim_DIIS,mod(iteration_SCF-j,max_dim_DIIS)+1) do i=1,dim_DIIS - j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1 - i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1 + i_DIIS = min(dim_DIIS,mod(iteration_SCF-i,max_dim_DIIS)+1) ! Compute product of two errors vectors @@ -229,7 +230,6 @@ END_DOC ! Compute Trace - B_matrix_DIIS(i,j) = 0.d0 do k=1,ao_num B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + scratch(k,k) enddo @@ -238,12 +238,11 @@ END_DOC ! Pad B matrix and build the X matrix + C_vector_DIIS(:) = 0.d0 do i=1,dim_DIIS B_matrix_DIIS(i,dim_DIIS+1) = -1.d0 B_matrix_DIIS(dim_DIIS+1,i) = -1.d0 - C_vector_DIIS(i) = 0.d0 enddo - B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0 C_vector_DIIS(dim_DIIS+1) = -1.d0 deallocate(scratch) @@ -259,9 +258,10 @@ END_DOC allocate(AF(dim_DIIS+1,dim_DIIS+1)) allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) ) allocate(scratch(lwork,1)) + scratch(:,1) = 0.d0 anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, & - size(B_matrix_DIIS,1), scratch) + size(B_matrix_DIIS,1), scratch(1,1)) AF(:,:) = B_matrix_DIIS(:,:) call dgetrf(dim_DIIS+1,dim_DIIS+1,AF,size(AF,1),ipiv,info) diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index a7d5b978..417b25ad 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -17,7 +17,7 @@ program molden write(i_unit_output,'(A)') '[Molden Format]' - write(i_unit_output,'(A)') '[Atoms] ANGSTROM' + write(i_unit_output,'(A)') '[Atoms] Angs' do i = 1, nucl_num write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & trim(element_name(int(nucl_charge(i)))), & diff --git a/src/zmq/utils.irp.f b/src/zmq/utils.irp.f index 503dedd2..07e3a88f 100644 --- a/src/zmq/utils.irp.f +++ b/src/zmq/utils.irp.f @@ -585,6 +585,7 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,name_in) stop 'Wrong end of job' endif + message = repeat(' ',512) do i=360,1,-1 rc = f77_zmq_send(zmq_to_qp_run_socket, 'end_job '//trim(zmq_state),8+len(trim(zmq_state)),0) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 512, 0) @@ -645,6 +646,7 @@ integer function connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) endif endif + message = repeat(' ',512) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) message = trim(message(1:rc)) if(message(1:5) == "error") then