diff --git a/config/ifort_avx.cfg b/config/ifort_avx.cfg index d3fcd1f0..56f1651d 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_avx.cfg @@ -32,7 +32,7 @@ OPENMP : 1 ; Append OpenMP flags # [OPT] FC : -traceback -FCFLAGS : -xAVX -O2 -ip -ftz -g +FCFLAGS : -mavx -O2 -ip -ftz -g # Profiling flags ################# diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 918f26ed..f05b64f0 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -11,7 +11,7 @@ END_PROVIDER implicit none logical, external :: testTeethBuilding integer :: i,j - pt2_n_tasks_max = elec_alpha_num*elec_alpha_num + elec_alpha_num*elec_beta_num - n_core_orb*2 + pt2_n_tasks_max = elec_alpha_num*elec_alpha_num + elec_alpha_num*elec_beta_num - n_core_orb*2 pt2_n_tasks_max = min(pt2_n_tasks_max,1+N_det_generators/10000) call write_int(6,pt2_n_tasks_max,'pt2_n_tasks_max') @@ -96,7 +96,7 @@ logical function testTeethBuilding(minF, N) do u0 = tilde_cW(n0) r = tilde_cW(n0 + minF) - Wt = (1d0 - u0) * f + Wt = (1d0 - u0) * f if (dabs(Wt) <= 1.d-3) then exit endif @@ -123,6 +123,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, intent(in) :: N_in +! integer, intent(inout) :: N_in double precision, intent(in) :: relative_error, E(N_states) double precision, intent(out) :: pt2(N_states),error(N_states) double precision, intent(out) :: variance(N_states),norm(N_states) @@ -152,7 +153,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) PROVIDE psi_occ_pattern_hii det_to_occ_pattern endif - if (N_det <= max(4,N_states)) then + if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then pt2=0.d0 variance=0.d0 norm=0.d0 @@ -324,7 +325,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) print '(A)', ' Samples Energy Stat. Err Variance Norm Seconds ' print '(A)', '========== ================= =========== =============== =============== =================' - PROVIDE global_selection_buffer + PROVIDE global_selection_buffer !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & !$OMP PRIVATE(i) i = omp_get_thread_num() @@ -374,7 +375,7 @@ subroutine pt2_slave_inproc(i) implicit none integer, intent(in) :: i - PROVIDE global_selection_buffer + PROVIDE global_selection_buffer call run_pt2_slave(1,i,pt2_e0_denominator) end @@ -556,8 +557,8 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc print*,'PB !!!' print*,'If you see this, send an email to Anthony scemama with the following content' print*,irp_here - print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max - stop -1 + print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max + stop -1 endif if (zmq_delete_tasks_async_send(zmq_to_qp_run_socket,task_id,n_tasks,sending) == -1) then stop 'PT2: Unable to delete tasks (send)' @@ -568,7 +569,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc print*,'If you see this, send an email to Anthony scemama with the following content' print*,irp_here print*,'i,index(i),size(ei,2) = ',i,index(i),size(ei,2) - stop -1 + stop -1 endif eI(1:N_states, index(i)) += eI_task(1:N_states,i) vI(1:N_states, index(i)) += vI_task(1:N_states,i) @@ -759,25 +760,25 @@ END_PROVIDER double precision, allocatable :: tilde_w(:), tilde_cW(:) double precision :: r, tooth_width integer, external :: pt2_find_sample - + double precision :: rss double precision, external :: memory_of_double, memory_of_int rss = memory_of_double(2*N_det_generators+1) call check_mem(rss,irp_here) - + if (N_det_generators == 1) then - + pt2_w(1) = 1.d0 pt2_cw(1) = 1.d0 pt2_u_0 = 1.d0 pt2_W_T = 0.d0 pt2_n_0(1) = 0 pt2_n_0(2) = 1 - + else - + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) - + tilde_cW(0) = 0d0 if (is_complex) then @@ -795,9 +796,9 @@ END_PROVIDER do i=N_det_generators,1,-1 norm += tilde_w(i) enddo - + tilde_w(:) = tilde_w(:) / norm - + tilde_cW(0) = -1.d0 do i=1,N_det_generators tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) @@ -818,13 +819,13 @@ END_PROVIDER stop -1 end if end do - + do t=2, pt2_N_teeth r = pt2_u_0 + pt2_W_T * dble(t-1) pt2_n_0(t) = pt2_find_sample(r, tilde_cW) end do pt2_n_0(pt2_N_teeth+1) = N_det_generators - + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) do t=1, pt2_N_teeth tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) @@ -836,7 +837,7 @@ END_PROVIDER pt2_w(i) = tilde_w(i) * pt2_w_t / tooth_width end do end do - + pt2_cW(0) = 0d0 do i=1,N_det_generators pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) diff --git a/src/cipsi/run_selection_slave.irp.f b/src/cipsi/run_selection_slave.irp.f index e5d86202..9b87b6ba 100644 --- a/src/cipsi/run_selection_slave.irp.f +++ b/src/cipsi/run_selection_slave.irp.f @@ -104,6 +104,17 @@ subroutine run_selection_slave(thread,iproc,energy) ctask = ctask + 1 end do + if(ctask > 0) then + call sort_selection_buffer(buf) +! call merge_selection_buffers(buf,buf2) + call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask) +! buf%mini = buf2%mini + pt2(:) = 0d0 + variance(:) = 0d0 + norm(:) = 0d0 + buf%cur = 0 + end if + ctask = 0 integer, external :: disconnect_from_taskserver if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 29cbc2d9..f11775f8 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -52,7 +52,7 @@ subroutine update_pt2_and_variance_weights(pt2, variance, norm, N_st) rpt2(k) = pt2(k)/(1.d0 + norm(k)) enddo - avg = sum(rpt2(1:N_st)) / dble(N_st) + avg = sum(rpt2(1:N_st)) / dble(N_st) - 1.d-32 ! Avoid future division by zero do k=1,N_st element = exp(dt*(rpt2(k)/avg -1.d0)) element = min(1.5d0 , element) @@ -61,7 +61,7 @@ subroutine update_pt2_and_variance_weights(pt2, variance, norm, N_st) pt2_match_weight(k) = product(memo_pt2(k,:)) enddo - avg = sum(variance(1:N_st)) / dble(N_st) + avg = sum(variance(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero do k=1,N_st element = exp(dt*(variance(k)/avg -1.d0)) element = min(1.5d0 , element) @@ -356,7 +356,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d i = psi_bilinear_matrix_rows(l_a) if (nt + exc_degree(i) <= 4) then ! don't keep anything more than 4-fold total exc idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) - if (psi_average_norm_contrib_sorted(idx) > 1.d-12) then + if (psi_average_norm_contrib_sorted(idx) > 0.d0) then indices(k) = idx k=k+1 endif @@ -397,7 +397,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d idx = psi_det_sorted_order( & psi_bilinear_matrix_order( & psi_bilinear_matrix_transp_order(l_a))) - if (psi_average_norm_contrib_sorted(idx) > 1.d-12) then + if (psi_average_norm_contrib_sorted(idx) > 0.d0) then indices(k) = idx k=k+1 endif diff --git a/src/mo_basis/mos_cplx.irp.f b/src/mo_basis/mos_cplx.irp.f index e25e7717..c88441b7 100644 --- a/src/mo_basis/mos_cplx.irp.f +++ b/src/mo_basis/mos_cplx.irp.f @@ -81,12 +81,19 @@ BEGIN_PROVIDER [ complex*16, mo_coef_complex_kpts, (ao_num_per_kpt, mo_num_per_k integer :: i,j,k, mo_shft, ao_shft mo_coef_complex_kpts = (0.d0,0.d0) + ! do k=1,kpt_num + ! mo_shft = (k-1)*mo_num_per_kpt + ! ao_shft = (k-1)*ao_num_per_kpt + ! do i=1,mo_num_per_kpt + ! do j=1,ao_num_per_kpt + ! mo_coef_complex_kpts(j,i,k) = mo_coef_complex(j+ao_shft,i+mo_shft) + ! enddo + ! enddo + ! enddo do k=1,kpt_num - mo_shft = (k-1)*mo_num_per_kpt - ao_shft = (k-1)*ao_num_per_kpt do i=1,mo_num_per_kpt do j=1,ao_num_per_kpt - mo_coef_complex_kpts(j,i,k) = mo_coef_complex(j+ao_shft,i+mo_shft) + mo_coef_complex_kpts(j,i,k) = mo_coef_kpts(j,i,k) enddo enddo enddo