diff --git a/config/ifort_xHost.cfg b/config/ifort_xHost.cfg new file mode 100644 index 00000000..5d952e54 --- /dev/null +++ b/config/ifort_xHost.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=64 + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xHost -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/configure b/configure index 391f5384..b45bfd27 100755 --- a/configure +++ b/configure @@ -238,7 +238,7 @@ EOF tar --gunzip --extract --file libcap.tar.gz rm libcap.tar.gz cd libcap-*/libcap - prefix=$QP_ROOT make install + prefix=$QP_ROOT make BUILD_GPERF=no install EOF elif [[ ${PACKAGE} = bwrap ]] ; then diff --git a/src/ao_two_e_ints/integrals_in_map_slave.irp.f b/src/ao_two_e_ints/integrals_in_map_slave.irp.f index a91bdecb..122fa2ac 100644 --- a/src/ao_two_e_ints/integrals_in_map_slave.irp.f +++ b/src/ao_two_e_ints/integrals_in_map_slave.irp.f @@ -116,7 +116,7 @@ subroutine ao_two_e_integrals_in_map_slave(thread,iproc) exit endif if (task_id == 0) exit - read(task,*) j, l + call sscanf_dd(task, j, l) integer, external :: task_done_to_taskserver call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 489ffaaf..a0107513 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -187,7 +187,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then stop 'Unable to put pt2_stoch_istate on ZMQ server' endif - if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then stop 'Unable to put threshold_generators on ZMQ server' endif diff --git a/src/cipsi/run_selection_slave.irp.f b/src/cipsi/run_selection_slave.irp.f index c2ba2379..91bd3a38 100644 --- a/src/cipsi/run_selection_slave.irp.f +++ b/src/cipsi/run_selection_slave.irp.f @@ -52,7 +52,7 @@ subroutine run_selection_slave(thread,iproc,energy) ctask = ctask - 1 else integer :: i_generator, N, subset, bsize - read(task,*) subset, i_generator, N + call sscanf_ddd(task, subset, i_generator, N) if(buf%N == 0) then ! Only first time call create_selection_buffer(N, N*2, buf) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 5e6fd7f9..6a6d2a23 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -8,27 +8,56 @@ subroutine get_mask_phase(det1, pm, Nint) integer(bit_kind), intent(out) :: pm(Nint,2) integer(bit_kind) :: tmp1, tmp2 integer :: i - pm(1:Nint,1:2) = det1(1:Nint,1:2) tmp1 = 0_8 tmp2 = 0_8 - do i=1,Nint - pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 1)) - pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 1)) - pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2)) - pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2)) - pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4)) - pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4)) - pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8)) - pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8)) - pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16)) - pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16)) - pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32)) - pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32)) - pm(i,1) = ieor(pm(i,1), tmp1) - pm(i,2) = ieor(pm(i,2), tmp2) - if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1) - if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2) - end do + select case (Nint) + +BEGIN_TEMPLATE + case ($Nint) + do i=1,$Nint + pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1)) + pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32)) + pm(i,1) = ieor(pm(i,1), tmp1) + pm(i,2) = ieor(pm(i,2), tmp2) + if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1) + if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2) + end do +SUBST [ Nint ] +1;; +2;; +3;; +4;; +END_TEMPLATE + case default + do i=1,Nint + pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1)) + pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16)) + pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32)) + pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32)) + pm(i,1) = ieor(pm(i,1), tmp1) + pm(i,2) = ieor(pm(i,2), tmp2) + if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1) + if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2) + end do + end select end subroutine @@ -450,11 +479,17 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d endif do i=1,fullinteresting(0) - fullminilist(1:N_int,1:2,i) = psi_det_sorted(1:N_int,1:2,fullinteresting(i)) + do k=1,N_int + fullminilist(k,1,i) = psi_det_sorted(k,1,fullinteresting(i)) + fullminilist(k,2,i) = psi_det_sorted(k,2,fullinteresting(i)) + enddo enddo do i=1,interesting(0) - minilist(1:N_int,1:2,i) = psi_det_sorted(1:N_int,1:2,interesting(i)) + do k=1,N_int + minilist(k,1,i) = psi_det_sorted(k,1,interesting(i)) + minilist(k,2,i) = psi_det_sorted(k,2,interesting(i)) + enddo enddo do s2=s1,2 @@ -673,10 +708,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d w = 0d0 -! integer(bit_kind) :: occ(N_int,2), n -! call configuration_of_det(det,occ,N_int) -! call configuration_to_dets_size(occ,n,elec_alpha_num,N_int) - e_pert = 0.d0 coef = 0.d0 logical :: do_diag @@ -704,7 +735,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision :: eigvalues(N_states+1) double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2) - integer :: iwork(3+5*(N_states+1)), info, k ,n + integer :: iwork(3+5*(N_states+1)), info, k if (do_diag) then double precision :: pt2_matrix(N_states+1,N_states+1) @@ -770,36 +801,43 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection -! w = w - alpha_h_psi * alpha_h_psi * s_weight(istate,istate) - w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) -! do jstate=1,N_states -! if (istate == jstate) cycle -! w = w + dabs(alpha_h_psi*mat(jstate,p1,p2)) * s_weight(istate,jstate) -! enddo + if (h0_type == 'CFG') then + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + / c0_weight(istate) + else + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) + endif case(6) -! w = w - coef(istate) * coef(istate) * s_weight(istate,istate) - w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) -! do jstate=1,N_states -! if (istate == jstate) cycle -! w = w + dabs(coef(istate)*coef(jstate)) * s_weight(istate,jstate) -! enddo + if (h0_type == 'CFG') then + w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) & + / c0_weight(istate) + else + w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) + endif case default ! Energy selection -! w = w + e_pert(istate) * s_weight(istate,istate) - w = min(w, e_pert(istate) * s_weight(istate,istate)) -! do jstate=1,N_states -! if (istate == jstate) cycle -! w = w + dabs(X(istate)*X(jstate)) * s_weight(istate,jstate) -! enddo + if (h0_type == 'CFG') then + w = min(w, e_pert(istate) * s_weight(istate,istate)) / c0_weight(istate) + else + w = min(w, e_pert(istate) * s_weight(istate,istate)) + endif end select end do -! w = dble(n) * w - + integer(bit_kind) :: occ(N_int,2), n + if (h0_type == 'CFG') then + do k=1,N_int + occ(k,1) = ieor(det(k,1),det(k,2)) + occ(k,2) = iand(det(k,1),det(k,2)) + enddo + call configuration_to_dets_size(occ,n,elec_alpha_num,N_int) + n = max(n,1) + w *= dble(n) + endif if(w <= buf%mini) then call add_to_selection_buffer(buf, det, w) diff --git a/src/cipsi/selection_weight.irp.f b/src/cipsi/selection_weight.irp.f index fb05263a..3c09e59a 100644 --- a/src/cipsi/selection_weight.irp.f +++ b/src/cipsi/selection_weight.irp.f @@ -38,7 +38,7 @@ subroutine update_pt2_and_variance_weights(pt2_data, N_st) avg = sum(pt2(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero - dt = 2.d0 !* selection_factor + dt = 8.d0 !* selection_factor do k=1,N_st element = exp(dt*(pt2(k)/avg - 1.d0)) element = min(2.0d0 , element) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 48ae3c2b..4385430e 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -62,41 +62,43 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) integer(bit_kind),intent(in) :: Ialpha(N_int) integer(bit_kind),intent(in) :: Ibeta(N_int) real*8,intent(out) :: phaseout - integer(bit_kind) :: mask(N_int), deta(N_int), detb(N_int) + integer(bit_kind) :: mask, deta(N_int), detb(N_int) integer :: nbetas - integer :: count, k -if (N_int >1 ) then - stop 'TODO: get_phase_qp_to_cfg ' -endif + integer :: k - nbetas = 0 - mask = 0_bit_kind - count = 0 + ! Initliaze deta and detb deta = Ialpha detb = Ibeta - ! remove the domos - mask = IAND(deta,detb) - deta = IEOR(deta,mask) - detb = IEOR(detb,mask) - mask = 0 - phaseout = 1.0 - k = 1 - do while((deta(k)).GT.0_8) - mask(k) = ISHFT(1_8,count) - if(POPCNT(IAND(deta(k),mask(k))).EQ.1)then - if(IAND(nbetas,1).EQ.0) then - phaseout *= 1.0d0 - else - phaseout *= -1.0d0 - endif - deta(k) = IEOR(deta(k),mask(k)) - else - if(POPCNT(IAND(detb(k),mask(k))).EQ.1) then - nbetas += 1 - detb(k) = IEOR(detb(k),mask(k)) - endif - endif - count += 1 + + ! Find how many alpha electrons there are in all the N_ints + integer :: Na(N_int) + do k=1,N_int + Na(k) = popcnt(deta(k)) + enddo + + integer :: shift, ipos, nperm + phaseout = 1.d0 + do k=1,N_int + + do while(detb(k) /= 0_bit_kind) + ! Find the lowest beta electron and clear it + ipos = trailz(detb(k)) + detb(k) = ibclr(detb(k),ipos) + + ! Create a mask will all MOs higher than the beta electron + mask = not(shiftl(1_bit_kind,ipos + 1) - 1_bit_kind) + + ! Apply the mask to the alpha string to count how many electrons to cross + nperm = popcnt( iand(mask, deta(k)) ) + + ! Count how many alpha electrons are above the beta electron in the other integers + nperm = nperm + sum(Na(k+1:N_int)) + if (iand(nperm,1) == 1) then + phaseout = -phaseout + endif + + enddo + enddo end subroutine get_phase_qp_to_cfg diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index be1873c7..8fd023da 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -440,7 +440,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) ipos=1 do imin=1,N_det,tasksize imax = min(N_det,imin-1+tasksize) - if (imin==1) then + if (imin<=N_det/2) then istep = 2 else istep = 1 @@ -507,7 +507,9 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - call omp_set_max_active_levels(4) + + call omp_set_max_active_levels(5) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 32e36500..55370cc5 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -57,7 +57,8 @@ END_PROVIDER if (diag_algorithm == "Davidson") then - if (s2_eig.and.only_expected_s2.and.expected_s2==0.d0) then + if (s2_eig.and.only_expected_s2) then +! if (s2_eig.and.only_expected_s2.and.expected_s2==0.d0) then call davidson_diag_H_csf(psi_det,CI_eigenvectors, & size(CI_eigenvectors,1),CI_electronic_energy, & N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) @@ -78,6 +79,7 @@ END_PROVIDER TOUCH N_states_diag if (s2_eig.and.only_expected_s2) then +! if (s2_eig.and.only_expected_s2.and.expected_s2==0.d0) then allocate (CI_electronic_energy_tmp (N_states_diag) ) allocate (CI_eigenvectors_tmp (N_det,N_states_diag) ) diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f index 232c9e2b..dea4a566 100644 --- a/src/determinants/spindeterminants.irp.f +++ b/src/determinants/spindeterminants.irp.f @@ -674,6 +674,19 @@ subroutine create_wf_of_psi_bilinear_matrix(truncate) ! of $\alpha$ and $\beta$ determinants END_DOC logical, intent(in) :: truncate + + call generate_all_alpha_beta_det_products + call update_wf_of_psi_bilinear_matrix(truncate) + +end + +subroutine update_wf_of_psi_bilinear_matrix(truncate) + use bitmasks + implicit none + BEGIN_DOC + ! Updates a wave function when psi_bilinear_matrix was modified + END_DOC + logical, intent(in) :: truncate integer :: i,j,k integer(bit_kind) :: tmp_det(N_int,2) integer :: idx @@ -681,7 +694,6 @@ subroutine create_wf_of_psi_bilinear_matrix(truncate) double precision :: norm(N_states) PROVIDE psi_bilinear_matrix - call generate_all_alpha_beta_det_products norm = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) & !$OMP PRIVATE(i,j,k,idx,tmp_det) & @@ -717,7 +729,7 @@ subroutine create_wf_of_psi_bilinear_matrix(truncate) enddo psi_det = psi_det_sorted_bit psi_coef = psi_coef_sorted_bit - TOUCH psi_det psi_coef + TOUCH psi_det psi_coef N_det_beta_unique N_det_alpha_unique psi_det_beta_unique psi_det_alpha_unique psi_det = psi_det_sorted psi_coef = psi_coef_sorted norm(1) = 0.d0 @@ -733,7 +745,7 @@ subroutine create_wf_of_psi_bilinear_matrix(truncate) endif enddo N_det = min(i,N_det) - SOFT_TOUCH psi_det psi_coef N_det + SOFT_TOUCH psi_det psi_coef N_det N_det_beta_unique N_det_alpha_unique psi_det_beta_unique psi_det_alpha_unique end @@ -773,7 +785,7 @@ subroutine generate_all_alpha_beta_det_products deallocate(tmp_det) !$OMP END PARALLEL call copy_H_apply_buffer_to_wf - SOFT_TOUCH psi_det psi_coef N_det + SOFT_TOUCH psi_det psi_coef N_det N_det_beta_unique N_det_alpha_unique psi_det_alpha_unique psi_det_beta_unique end @@ -1063,19 +1075,17 @@ subroutine get_all_spin_singles_and_doubles_1(buffer, idx, spindet, size_buffer, integer :: i include 'utils/constants.include.F' integer :: degree - + integer :: add_double(0:64) = (/ 0, 0, 0, 0, 1, (0, i=1,60) /) + integer :: add_single(0:64) = (/ 0, 0, 1, 0, 0, (0, i=1,60) /) n_singles = 1 n_doubles = 1 do i=1,size_buffer degree = popcnt( xor( spindet, buffer(i) ) ) - if ( degree == 4 ) then - doubles(n_doubles) = idx(i) - n_doubles = n_doubles+1 - else if ( degree == 2 ) then - singles(n_singles) = idx(i) - n_singles = n_singles+1 - endif + doubles(n_doubles) = idx(i) + singles(n_singles) = idx(i) + n_doubles = n_doubles+add_double(degree) + n_singles = n_singles+add_single(degree) enddo n_singles = n_singles-1 n_doubles = n_doubles-1 @@ -1101,15 +1111,14 @@ subroutine get_all_spin_singles_1(buffer, idx, spindet, size_buffer, singles, n_ integer :: i integer(bit_kind) :: v integer :: degree + integer :: add_single(0:64) = (/ 0, 0, 1, 0, 0, (0, i=1,60) /) include 'utils/constants.include.F' n_singles = 1 do i=1,size_buffer degree = popcnt(xor( spindet, buffer(i) )) - if (degree == 2) then - singles(n_singles) = idx(i) - n_singles = n_singles+1 - endif + singles(n_singles) = idx(i) + n_singles = n_singles+add_single(degree) enddo n_singles = n_singles-1 @@ -1133,14 +1142,13 @@ subroutine get_all_spin_doubles_1(buffer, idx, spindet, size_buffer, doubles, n_ integer :: i include 'utils/constants.include.F' integer :: degree + integer :: add_double(0:64) = (/ 0, 0, 0, 0, 1, (0, i=1,60) /) n_doubles = 1 do i=1,size_buffer degree = popcnt(xor( spindet, buffer(i) )) - if ( degree == 4 ) then - doubles(n_doubles) = idx(i) - n_doubles = n_doubles+1 - endif + doubles(n_doubles) = idx(i) + n_doubles = n_doubles+add_double(degree) enddo n_doubles = n_doubles-1 @@ -1181,16 +1189,10 @@ subroutine get_all_spin_singles_and_doubles_$N_int(buffer, idx, spindet, size_bu xorvec(k) = xor( spindet(k), buffer(k,i) ) enddo - if (xorvec(1) /= 0_8) then - degree = popcnt(xorvec(1)) - else - degree = 0 - endif + degree = 0 - do k=2,$N_int - if ( (degree <= 4).and.(xorvec(k) /= 0_8) ) then + do k=1,$N_int degree = degree + popcnt(xorvec(k)) - endif enddo if ( degree == 4 ) then @@ -1235,23 +1237,19 @@ subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, single xorvec(k) = xor( spindet(k), buffer(k,i) ) enddo - if (xorvec(1) /= 0_8) then - degree = popcnt(xorvec(1)) - else - degree = 0 - endif + degree = 0 - do k=2,$N_int - if ( (degree <= 2).and.(xorvec(k) /= 0_8) ) then + do k=1,$N_int degree = degree + popcnt(xorvec(k)) - endif enddo - if ( degree == 2 ) then - singles(n_singles) = idx(i) - n_singles = n_singles+1 + if ( degree /= 2 ) then + cycle endif + singles(n_singles) = idx(i) + n_singles = n_singles+1 + enddo n_singles = n_singles-1 @@ -1284,23 +1282,19 @@ subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, double xorvec(k) = xor( spindet(k), buffer(k,i) ) enddo - if (xorvec(1) /= 0_8) then - degree = popcnt(xorvec(1)) - else - degree = 0 - endif + degree = 0 - do k=2,$N_int - if ( (degree <= 4).and.(xorvec(k) /= 0_8) ) then + do k=1,$N_int degree = degree + popcnt(xorvec(k)) - endif enddo - if ( degree == 4 ) then - doubles(n_doubles) = idx(i) - n_doubles = n_doubles+1 + if ( degree /= 4 ) then + cycle endif + doubles(n_doubles) = idx(i) + n_doubles = n_doubles+1 + enddo n_doubles = n_doubles-1 diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 1406d603..3b9eaeb4 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -23,6 +23,10 @@ END_DOC error_matrix_DIIS(ao_num,ao_num,max_dim_DIIS) & ) + Fock_matrix_DIIS = 0.d0 + error_matrix_DIIS = 0.d0 + mo_coef_save = 0.d0 + call write_time(6) print*,'Energy of the guess = ',SCF_energy @@ -198,7 +202,7 @@ END_DOC double precision,allocatable :: C_vector_DIIS(:) double precision,allocatable :: scratch(:,:) - integer :: i,j,k,i_DIIS,j_DIIS + integer :: i,j,k,l,i_DIIS,j_DIIS double precision :: rcond, ferr, berr integer, allocatable :: iwork(:) integer :: lwork @@ -214,28 +218,22 @@ END_DOC scratch(ao_num,ao_num) & ) -! Compute the matrices B and X + ! Compute the matrices B and X B_matrix_DIIS(:,:) = 0.d0 do j=1,dim_DIIS j_DIIS = min(dim_DIIS,mod(iteration_SCF-j,max_dim_DIIS)+1) - do i=1,dim_DIIS + do i=1,dim_DIIS i_DIIS = min(dim_DIIS,mod(iteration_SCF-i,max_dim_DIIS)+1) -! Compute product of two errors vectors - - call dgemm('N','N',ao_num,ao_num,ao_num, & - 1.d0, & - error_matrix_DIIS(1,1,i_DIIS),size(error_matrix_DIIS,1), & - error_matrix_DIIS(1,1,j_DIIS),size(error_matrix_DIIS,1), & - 0.d0, & - scratch,size(scratch,1)) - -! Compute Trace - - do k=1,ao_num - B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + scratch(k,k) + ! Compute product of two errors vectors + do l=1,ao_num + do k=1,ao_num + B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + & + error_matrix_DIIS(k,l,i_DIIS) * error_matrix_DIIS(k,l,j_DIIS) + enddo enddo + enddo enddo @@ -308,6 +306,7 @@ END_DOC do k=1,dim_DIIS if (dabs(X_vector_DIIS(k)) < 1.d-10) cycle do i=1,ao_num + ! FPE here Fock_matrix_AO_(i,j) = Fock_matrix_AO_(i,j) + & X_vector_DIIS(k)*Fock_matrix_DIIS(i,j,dim_DIIS-k+1) enddo diff --git a/src/tools/print_energy.irp.f b/src/tools/print_energy.irp.f index f78dffc8..4fe1572c 100644 --- a/src/tools/print_energy.irp.f +++ b/src/tools/print_energy.irp.f @@ -14,24 +14,5 @@ end subroutine run implicit none - integer :: i,j - double precision :: i_H_psi_array(N_states) - double precision :: E(N_states) - double precision :: norm(N_states) - - E(1:N_states) = nuclear_repulsion - norm(1:N_states) = 0.d0 - do i=1,N_det - call i_H_psi(psi_det(1,1,i), psi_det, psi_coef, N_int, N_det, & - size(psi_coef,1), N_states, i_H_psi_array) - do j=1,N_states - norm(j) += psi_coef(i,j)*psi_coef(i,j) - E(j) += i_H_psi_array(j) * psi_coef(i,j) - enddo - enddo - - print *, 'Energy:' - do i=1,N_states - print *, E(i)/norm(i) - enddo + print *, psi_energy + nuclear_repulsion end diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f new file mode 100644 index 00000000..107161c8 --- /dev/null +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -0,0 +1,40 @@ +BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num,1)] + implicit none + BEGIN_DOC + ! two_e_dm_bb_mo(i,j,k,l,istate) = STATE SPECIFIC physicist notation for 2RDM of beta/beta electrons + ! + ! + ! + ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active + ! + ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)/2 + ! + ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" + ! + ! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + ! The two-electron energy of each state can be computed as: + ! + ! \sum_{i,j,k,l = 1, n_core_inact_act_orb} two_e_dm_mo(i,j,k,l,istate) * < ii jj | kk ll > + ! + ! with ii = list_core_inact_act(i), jj = list_core_inact_act(j), kk = list_core_inact_act(k), ll = list_core_inact_act(l) + END_DOC + two_e_dm_mo = 0.d0 + integer :: i,j,k,l,iorb,jorb,korb,lorb,istate + + do l=1,mo_num + lorb = list_core_inact_act(l) + do k=1,mo_num + korb = list_core_inact_act(k) + do j=1,mo_num + jorb = list_core_inact_act(j) + do i=1,mo_num + iorb = list_core_inact_act(i) + two_e_dm_mo(iorb,jorb,korb,lorb,1) = state_av_full_occ_2_rdm_spin_trace_mo(i,j,k,l) + enddo + enddo + enddo + enddo + two_e_dm_mo(:,:,:,:,:) = two_e_dm_mo(:,:,:,:,:) * 2.d0 + + END_PROVIDER + diff --git a/src/utils/c_funcs.c b/src/utils/c_funcs.c index 5b7035fb..16b53256 100644 --- a/src/utils/c_funcs.c +++ b/src/utils/c_funcs.c @@ -1,6 +1,40 @@ #include +#include +#include void usleep_c(int s) { usleep((useconds_t) s); } + +void sscanf_ssds_c(const char* str, char* s1, char* s2, int* i, char* s3) +{ + sscanf(str, "%s %s %d %s", s1, s2, i, s3); + s1[strlen(s1)] = ' '; + s2[strlen(s2)] = ' '; + s3[strlen(s3)] = ' '; +} + +void sscanf_dd_c(const char* str, int* i1, int* i2) +{ + sscanf(str, "%d %d", i1, i2); +} + +void sscanf_ddd_c(const char* str, int* i1, int* i2, int* i3) +{ + sscanf(str, "%d %d %d", i1, i2, i3); +} + +void sscanf_ss_c(const char* str, char* s1, char* s2) +{ + sscanf(str, "%s %s", s1, s2); + s1[strlen(s1)] = ' '; + s2[strlen(s2)] = ' '; +} + +void sscanf_sd_c(const char* str, char* s1, int* i) +{ + sscanf(str, "%s %d", s1, i); + s1[strlen(s1)] = ' '; +} + diff --git a/src/utils/c_functions.f90 b/src/utils/c_functions.f90 index 425aafd6..65d4ad62 100644 --- a/src/utils/c_functions.f90 +++ b/src/utils/c_functions.f90 @@ -2,20 +2,133 @@ module c_functions use iso_c_binding interface - subroutine usleep_c(us) bind (C,name="usleep_c") - use iso_c_binding - integer(c_int), value :: us - end subroutine usleep_c + subroutine usleep_c(us) bind (C,name="usleep_c") + use iso_c_binding + integer(c_int), value :: us + end subroutine usleep_c end interface -end module + interface + integer(c_int) function atoi_c(a) bind (C,name="atoi") + use iso_c_binding + character(kind=c_char), intent(in) :: a(*) + end function atoi_c + end interface -subroutine usleep(us) + interface + subroutine sscanf_ss_c(str,s1, s2) bind (C) + use iso_c_binding + character(kind=c_char), intent(in ) :: str(*) + character(kind=c_char), intent(out) :: s1(*),s2(*) + end subroutine sscanf_ss_c + end interface + + interface + subroutine sscanf_ssds_c(str, s1, s2, i, s3) bind (C) + use iso_c_binding + character(kind=c_char), intent(in ) :: str(*) + character(kind=c_char), intent(out) :: s1(*),s2(*),s3(*) + integer(kind=c_int) , intent(out) :: i + end subroutine sscanf_ssds_c + end interface + + interface + subroutine sscanf_dd_c(str, i1, i2) bind (C) + use iso_c_binding + character(kind=c_char), intent(in ) :: str(*) + integer(kind=c_int) , intent(out) :: i1, i2 + end subroutine sscanf_dd_c + end interface + + interface + subroutine sscanf_ddd_c(str, i1, i2, i3) bind (C) + use iso_c_binding + character(kind=c_char), intent(in ) :: str(*) + integer(kind=c_int) , intent(out) :: i1, i2, i3 + end subroutine sscanf_ddd_c + end interface + + interface + subroutine sscanf_sd_c(str,s1, i) bind (C) + use iso_c_binding + character(kind=c_char), intent(in ) :: str(*) + character(kind=c_char), intent(out) :: s1(*) + integer(kind=c_int) , intent(out) :: i + end subroutine sscanf_sd_c + end interface + +contains + + integer function atoi(a) + implicit none + character(len=*), intent(in) :: a + atoi = atoi_c(trim(a)//c_null_char) + end function atoi + +end module c_functions + +subroutine sscanf_ss(str, s1,s2) use c_functions use iso_c_binding implicit none + character(*), intent(in) :: str + character(*), intent(out) :: s1,s2 + s1 = ' ' + s2 = ' ' + call sscanf_ss_c(trim(str)//c_null_char, s1, s2) +end subroutine sscanf_ss + +subroutine sscanf_sd(str, s1,i) + use c_functions + use iso_c_binding + implicit none + character(*), intent(in) :: str + character(*), intent(out) :: s1 + integer, intent(out) :: i + s1 = ' ' + call sscanf_sd_c(trim(str)//c_null_char, s1, i) +end subroutine sscanf_sd + +subroutine sscanf_ssds(str, s1,s2,i,s3) + use c_functions + use iso_c_binding + implicit none + character(*), intent(in) :: str + character(*), intent(out) :: s1,s2,s3 + integer, intent(out) :: i + s1 = ' ' + s2 = ' ' + s3 = ' ' + call sscanf_ssds_c(trim(str)//c_null_char, s1, s2, i, s3) +end subroutine sscanf_ssds + +subroutine sscanf_dd(str, i1,i2) + use c_functions + use iso_c_binding + implicit none + character(*), intent(in) :: str + integer, intent(out) :: i1, i2 + call sscanf_dd_c(trim(str)//c_null_char, i1, i2) +end subroutine sscanf_dd + +subroutine sscanf_ddd(str, i1,i2,i3) + use c_functions + use iso_c_binding + implicit none + character(*), intent(in) :: str + integer, intent(out) :: i1, i2, i3 + call sscanf_ddd_c(trim(str)//c_null_char, i1, i2, i3) +end subroutine sscanf_ddd + + +subroutine usleep(us) + use iso_c_binding + use c_functions + implicit none integer, intent(in) :: us integer(c_int) :: u u = us call usleep_c(u) -end +end subroutine usleep + + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index c907e425..974417b1 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -447,7 +447,7 @@ double precision function rint(n,rho) else u_inv=1.d0/dsqrt(rho) u=rho*u_inv - rint=0.5d0*u_inv*sqpi*erf(u) + rint=0.5d0*u_inv*sqpi*derf(u) endif return endif @@ -463,7 +463,7 @@ double precision function rint(n,rho) endif u=rho*u_inv two_rho_inv = 0.5d0*u_inv*u_inv - val0=0.5d0*u_inv*sqpi*erf(u) + val0=0.5d0*u_inv*sqpi*derf(u) rint=(val0-v)*two_rho_inv do k=2,n rint=(rint*dfloat(k+k-1)-v)*two_rho_inv @@ -496,7 +496,7 @@ double precision function rint_sum(n_pt_out,rho,d1) else u_inv=1.d0/dsqrt(rho) u=rho*u_inv - rint_sum=0.5d0*u_inv*sqpi*erf(u) *d1(0) + rint_sum=0.5d0*u_inv*sqpi*derf(u) *d1(0) endif do i=2,n_pt_out,2 @@ -515,7 +515,7 @@ double precision function rint_sum(n_pt_out,rho,d1) u_inv=1.d0/dsqrt(rho) u=rho*u_inv two_rho_inv = 0.5d0*u_inv*u_inv - val0=0.5d0*u_inv*sqpi*erf(u) + val0=0.5d0*u_inv*sqpi*derf(u) rint_sum=val0*d1(0) rint_tmp=(val0-v)*two_rho_inv di = 3.d0 diff --git a/src/zmq/utils.irp.f b/src/zmq/utils.irp.f index 7d6f50ad..93dbd16a 100644 --- a/src/zmq/utils.irp.f +++ b/src/zmq/utils.irp.f @@ -16,6 +16,7 @@ END_PROVIDER BEGIN_PROVIDER [ character*(128), qp_run_address ] &BEGIN_PROVIDER [ integer, zmq_port_start ] use f77_zmq + use c_functions implicit none BEGIN_DOC ! Address of the qp_run socket @@ -32,14 +33,15 @@ END_PROVIDER do i=len(buffer),1,-1 if ( buffer(i:i) == ':') then qp_run_address = trim(buffer(1:i-1)) - read(buffer(i+1:), *, err=10,end=10) zmq_port_start + zmq_port_start = atoi(buffer(i+1:)) exit endif enddo - return - 10 continue - print *, irp_here, ': Error in read' - stop -1 + + if (zmq_port_start == 0) then + print *, irp_here, ': zmq_port_start is 0' + stop -1 + endif END_PROVIDER BEGIN_PROVIDER [ character*(128), zmq_socket_pull_tcp_address ] @@ -84,6 +86,7 @@ end subroutine switch_qp_run_to_master use f77_zmq + use c_functions implicit none BEGIN_DOC ! Address of the master qp_run socket @@ -102,16 +105,17 @@ subroutine switch_qp_run_to_master do i=len(buffer),1,-1 if ( buffer(i:i) == ':') then qp_run_address = trim(buffer(1:i-1)) - read(buffer(i+1:), *, end=10, err=10) zmq_port_start + zmq_port_start = atoi(buffer(i+1:)) exit endif enddo call reset_zmq_addresses return - 10 continue - print *, irp_here, ': Error in read' - stop -1 + if (zmq_port_start == 0) then + print *, irp_here, ': zmq_port_start is 0' + stop -1 + endif end @@ -650,12 +654,17 @@ integer function connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) message = trim(message(1:rc)) if(message(1:5) == "error") then - go to 10 + connect_to_taskserver = -1 + return end if - read(message,*, end=10, err=10) reply, state, worker_id, address + + call sscanf_ssds(message, reply, state, worker_id, address) + if (trim(reply) /= 'connect_reply') then - go to 10 + connect_to_taskserver = -1 + return endif + if (trim(state) /= zmq_state) then integer, external :: disconnect_from_taskserver_state if (disconnect_from_taskserver_state(zmq_to_qp_run_socket, worker_id, state) == -1) then @@ -663,13 +672,8 @@ integer function connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) continue endif connect_to_taskserver = -1 - return endif - return - 10 continue -! print *, irp_here//': '//trim(message) - connect_to_taskserver = -1 end integer function disconnect_from_taskserver(zmq_to_qp_run_socket, worker_id) @@ -698,11 +702,11 @@ integer function disconnect_from_taskserver_state(zmq_to_qp_run_socket, worker_i character*(512) :: message, reply character*(128) :: state_tmp - disconnect_from_taskserver_state = 0 + disconnect_from_taskserver_state = -1 write(message,*) 'disconnect '//trim(state), worker_id - sze = len(trim(message)) + sze = min(510,len(trim(message))) rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) if (rc /= sze) then @@ -711,23 +715,22 @@ integer function disconnect_from_taskserver_state(zmq_to_qp_run_socket, worker_i endif rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) + if (rc <= 0) then + disconnect_from_taskserver_state = -3 + return + endif + rc = min(510,rc) message = trim(message(1:rc)) - read(message,*, end=10, err=10) reply, state_tmp - if ((trim(reply) == 'disconnect_reply').and.(trim(state_tmp) == trim(state))) then - return - endif - if (trim(message) == 'error Wrong state') then - disconnect_from_taskserver_state = -1 - return - else if (trim(message) == 'error No job is running') then - disconnect_from_taskserver_state = -1 + call sscanf_ss(message, reply, state_tmp) + + if (trim(state_tmp) /= trim(state)) then return endif - return - 10 continue - disconnect_from_taskserver_state = -1 + if ((trim(reply) == 'disconnect_reply')) then + disconnect_from_taskserver_state = 0 + endif end integer function add_task_to_taskserver(zmq_to_qp_run_socket,task) @@ -893,7 +896,7 @@ integer function get_task_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id character*(1024) :: message character*(64) :: reply - integer :: rc, sze + integer :: rc, sze, i get_task_from_taskserver = 0 @@ -906,16 +909,16 @@ integer function get_task_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id return endif - message = repeat(' ',1024) + task_id = 0 + message = ' ' rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 1024, 0) - if (rc <= 0) then - print *, rc - stop "rc" - end if - rc = min(1024,rc) - read(message(1:rc),*, end=10, err=10) reply - if (trim(reply) == 'get_task_reply') then - read(message(1:rc),*, end=10, err=10) reply, task_id + i = 1 + do while (message(i:i) /= ' ') + i = i+1 + enddo + reply = message(1:i-1) + if (reply == 'get_task_reply') then + call sscanf_sd(message, reply, task_id) rc = 15 do while (rc < 1024 .and. message(rc:rc) == ' ') rc += 1 @@ -926,27 +929,21 @@ integer function get_task_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id rc += 1 task = message(rc:) else if (trim(reply) == 'terminate') then - task_id = 0 task = 'terminate' else if (trim(message) == 'error No job is running') then - task_id = 0 task = 'terminate' else if (trim(message) == 'error Wrong state') then - task_id = 0 task = 'terminate' else get_task_from_taskserver = -1 return endif - return - - 10 continue - get_task_from_taskserver = -1 end integer function get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id,task,n_tasks) + use c_functions use f77_zmq implicit none BEGIN_DOC @@ -964,7 +961,7 @@ integer function get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id,task_i get_tasks_from_taskserver = 0 - write(message,*) 'get_tasks '//trim(zmq_state), worker_id, n_tasks + write(message,'(A,A,X,I10,I10)') 'get_tasks ', trim(zmq_state), worker_id, n_tasks sze = len(trim(message)) rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0) @@ -975,8 +972,10 @@ integer function get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id,task_i message = repeat(' ',1024) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 1024, 0) - rc = min(1024,rc) - read(message(1:rc),*, end=10, err=10) reply + if (rc <= 0) then + get_tasks_from_taskserver = -1 + return + endif if (trim(message) == 'get_tasks_reply ok') then continue else if (trim(message) == 'terminate') then @@ -994,18 +993,22 @@ integer function get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id,task_i do i=1,n_tasks message = repeat(' ',512) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 1024, 0) + if (rc <= 0) then + get_tasks_from_taskserver = -1 + return + endif rc = min(1024,rc) - read(message(1:rc),*, end=10, err=10) task_id(i) + task_id(i) = atoi(message(1:rc)) if (task_id(i) == 0) then task(i) = 'terminate' n_tasks = i exit endif rc = 1 - do while (message(rc:rc) == ' ') + do while (rc < 1024 .and. message(rc:rc) == ' ') rc += 1 enddo - do while (message(rc:rc) /= ' ') + do while (rc < 1024 .and. message(rc:rc) /= ' ') rc += 1 enddo rc += 1