diff --git a/.travis.yml b/.travis.yml index c7730ed7..06fd4c96 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ os: linux -dist: trusty +dist: bionic sudo: false @@ -19,9 +19,11 @@ addons: packages: - gfortran - gcc - - liblapack-dev - - libblas-dev + - libatlas-base-dev +# - liblapack-dev +# - libblas-dev - wget + - eatmydata env: - OPAMROOT=$HOME/.opam @@ -29,12 +31,23 @@ env: cache: directories: - $HOME/.opam/ + - $HOME/cache language: python python: - - "2.7" + - "3.7" + +stages: + - configuration + - compilation + - testing + +jobs: + include: + - stage: configuration + script: eatmydata travis/configuration.sh + - stage: compilation + script: eatmydata travis/compilation.sh + - stage: testing + script: eatmydata travis/testing.sh -script: - - ./configure --install all --config ./config/travis.cfg - - source ./quantum_package.rc ; ninja -j 1 -v - - source ./quantum_package.rc ; qp_test -a diff --git a/INSTALL.rst b/INSTALL.rst index f1657dbb..dd961950 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -36,7 +36,7 @@ Requirements - Fortran compiler : GNU Fortran, Intel Fortran or IBM XL Fortran - `GNU make`_ - `Autoconf`_ -- `Python`_ > 3.0 +- `Python`_ > 3.7 - |IRPF90| : Fortran code generator - |EZFIO| : Easy Fortran Input/Output library generator - |BLAS| and |LAPACK| @@ -142,6 +142,14 @@ IRPF90 *IRPF90* is a Fortran code generator for programming using the Implicit Reference to Parameters (IRP) method. +If you have *pip* for Python2, you can do + +.. code:: bash + + python2 -m pip install --user irpf90 + +Otherwise, + * Download the latest version of IRPF90 here : ``_ and move the downloaded archive in the :file:`${QP_ROOT}/external` directory @@ -385,3 +393,17 @@ Otherwise, * Copy :file:`docopt-0.6.2/docopt.py` in the :file:`${QP_ROOT}/scripts` directory +resultsFile +----------- + +*resultsFile* is a Python package to extract data from output files of quantum chemistry +codes. + +If you have *pip* for Python3, you can do + +.. code:: bash + + python3 -m pip install --user resultsFile + + + diff --git a/config/travis.cfg b/config/travis.cfg index 2be5d9a0..a609002e 100644 --- a/config/travis.cfg +++ b/config/travis.cfg @@ -11,9 +11,9 @@ # [COMMON] FC : gfortran -ffree-line-length-none -I . -g -fPIC -LAPACK_LIB : -llapack -lblas +LAPACK_LIB : -llapack -lblas IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert +IRPF90_FLAGS : --ninja --align=32 --assert # Global options ################ @@ -35,14 +35,14 @@ OPENMP : 1 ; Append OpenMP flags # -ffast-math and the Fortran-specific # -fno-protect-parens and -fstack-arrays. [OPT] -FCFLAGS : -Ofast -march=native +FCFLAGS : -Ofast -march=native # Profiling flags ################# # [PROFILE] -FC : -p -g +FC : -p -g FCFLAGS : -Ofast -fimplicit-none @@ -53,7 +53,7 @@ FCFLAGS : -Ofast -fimplicit-none # -g : Extra debugging information # [DEBUG] -FCFLAGS : -Ofast -fcheck=all -g -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant +FCFLAGS : -Ofast -fcheck=all -g -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant # OpenMP flags @@ -61,5 +61,5 @@ FCFLAGS : -Ofast -fcheck=all -g -Waliasing -Wampersand -Wconversion -Wsurprising # [OPENMP] FC : -fopenmp -IRPF90_FLAGS : --openmp +IRPF90_FLAGS : --openmp diff --git a/configure b/configure index 5c5ce1ab..4f930a34 100755 --- a/configure +++ b/configure @@ -17,7 +17,7 @@ export CC=gcc # /!\ When updating version, update also etc files -EZFIO_TGZ="EZFIO.2.0.2.tar.gz" +EZFIO_TGZ="EZFIO-v2.0.3.tar.gz" BATS_URL="https://github.com/bats-core/bats-core/archive/v1.1.0.tar.gz" BUBBLE_URL="https://github.com/projectatomic/bubblewrap/releases/download/v0.3.3/bubblewrap-0.3.3.tar.xz" DOCOPT_URL="https://github.com/docopt/docopt/archive/0.6.2.tar.gz" @@ -185,7 +185,7 @@ if [[ ${EZFIO} = $(not_found) ]] ; then cd "\${QP_ROOT}"/external tar --gunzip --extract --file ${EZFIO_TGZ} rm -rf ezfio - mv EZFIO ezfio + mv EZFIO ezfio || mv EZFIO-v*/ ezfio EOF fi @@ -500,7 +500,7 @@ echo " ||----w | " echo " || || " echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" echo "" -echo "If you have PIP, you can install the Basis Sex Exchange command-line tool:" +echo "If you have PIP, you can install the Basis Set Exchange command-line tool:" echo "" echo " ./configure -i bse" echo "" diff --git a/external/EZFIO-v2.0.3.tar.gz b/external/EZFIO-v2.0.3.tar.gz new file mode 100644 index 00000000..6455aaa3 Binary files /dev/null and b/external/EZFIO-v2.0.3.tar.gz differ diff --git a/external/EZFIO.2.0.2.tar.gz b/external/EZFIO.2.0.2.tar.gz deleted file mode 100644 index 3d1e7d75..00000000 Binary files a/external/EZFIO.2.0.2.tar.gz and /dev/null differ diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index a6ab6f8e..34b16ff3 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -1,34 +1,43 @@ subroutine run_cipsi implicit none + use selection_types BEGIN_DOC ! Selected Full Configuration Interaction with deterministic selection and ! stochastic PT2. END_DOC integer :: i,j,k - double precision, allocatable :: pt2(:), variance(:), norm(:), rpt2(:), zeros(:) - integer :: n_det_before, to_select + type(pt2_type) :: pt2_data, pt2_data_err + double precision, allocatable :: zeros(:) + integer :: to_select + logical, external :: qp_stop + double precision :: threshold_generators_save double precision :: rss double precision, external :: memory_of_double + PROVIDE H_apply_buffer_allocated + + N_iter = 1 + threshold_generators = 1.d0 + SOFT_TOUCH threshold_generators + rss = memory_of_double(N_states)*4.d0 call check_mem(rss,irp_here) - N_iter = 1 - allocate (pt2(N_states), zeros(N_states), rpt2(N_states), norm(N_states), variance(N_states)) + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) double precision :: hf_energy_ref logical :: has double precision :: relative_error - PROVIDE H_apply_buffer_allocated - relative_error=PT2_relative_error zeros = 0.d0 - pt2 = -huge(1.e0) - rpt2 = -huge(1.e0) - norm = 0.d0 - variance = huge(1.e0) + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap(:,:) = 0.d0 + pt2_data % variance = huge(1.e0) if (s2_eig) then call make_s2_eigenfunction @@ -55,61 +64,57 @@ subroutine run_cipsi call save_wavefunction endif - n_det_before = 0 - double precision :: correlation_energy_ratio - double precision :: threshold_generators_save - threshold_generators_save = threshold_generators - double precision :: error(N_states) - logical, external :: qp_stop correlation_energy_ratio = 0.d0 do while ( & (N_det < N_det_max) .and. & - (maxval(abs(rpt2(1:N_states))) > pt2_max) .and. & - (maxval(variance(1:N_states)) > variance_max) .and. & - (correlation_energy_ratio <= correlation_energy_ratio_max) & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. & + (correlation_energy_ratio <= correlation_energy_ratio_max) & ) write(*,'(A)') '--------------------------------------------------------------------------------' + to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) + to_select = max(N_states_diag, to_select) if (do_pt2) then - pt2 = 0.d0 - variance = 0.d0 - norm = 0.d0 + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + threshold_generators_save = threshold_generators threshold_generators = 1.d0 SOFT_TOUCH threshold_generators - call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & - norm, 0) ! Stochastic PT2 + call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error, 0) ! Stochastic PT2 threshold_generators = threshold_generators_save SOFT_TOUCH threshold_generators + else + call pt2_dealloc(pt2_data) + call pt2_alloc(pt2_data, N_states) + call ZMQ_selection(to_select, pt2_data) endif - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo - correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref) / & - (psi_energy_with_nucl_rep(1) + rpt2(1) - hf_energy_ref) + (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref) correlation_energy_ratio = min(1.d0,correlation_energy_ratio) call write_double(6,correlation_energy_ratio, 'Correlation ratio') - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) - call save_energy(psi_energy_with_nucl_rep, rpt2) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() N_iter += 1 - if (qp_stop()) exit + if (qp_stop()) exit - n_det_before = N_det - to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) - to_select = max(N_states_diag, to_select) - to_select = max(to_select,1) - call ZMQ_selection(to_select, pt2, variance, norm) + ! Add selected determinants + call copy_H_apply_buffer_to_wf() +! call save_wavefunction PROVIDE psi_coef PROVIDE psi_det @@ -118,11 +123,7 @@ subroutine run_cipsi call diagonalize_CI call save_wavefunction call save_energy(psi_energy_with_nucl_rep, zeros) - if (qp_stop()) exit -print *, (N_det < N_det_max) -print *, (maxval(abs(rpt2(1:N_states))) > pt2_max) -print *, (maxval(variance(1:N_states)) > variance_max) -print *, (correlation_energy_ratio <= correlation_energy_ratio_max) + if (qp_stop()) exit enddo if (.not.qp_stop()) then @@ -133,13 +134,13 @@ print *, (correlation_energy_ratio <= correlation_energy_ratio_max) endif if (do_pt2) then - pt2(:) = 0.d0 - variance(:) = 0.d0 - norm(:) = 0.d0 + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) threshold_generators = 1d0 SOFT_TOUCH threshold_generators - call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, & - norm,0) ! Stochastic PT2 + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 SOFT_TOUCH threshold_generators endif print *, 'N_det = ', N_det @@ -147,15 +148,13 @@ print *, (correlation_energy_ratio <= correlation_energy_ratio_max) print *, 'N_states = ', N_states print*, 'correlation_ratio = ', correlation_energy_ratio - - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo - - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) - call save_energy(psi_energy_with_nucl_rep, rpt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + call print_summary(psi_energy_with_nucl_rep(1:N_states), & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() endif + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) end diff --git a/src/cipsi/energy.irp.f b/src/cipsi/energy.irp.f index 37b29593..1d8c6bf5 100644 --- a/src/cipsi/energy.irp.f +++ b/src/cipsi/energy.irp.f @@ -37,3 +37,11 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] END_PROVIDER +BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ] + implicit none + BEGIN_DOC + ! Overlap between the perturbed wave functions + END_DOC + pt2_overlap(1:N_states,1:N_states) = 0.d0 +END_PROVIDER + diff --git a/src/cipsi/pert_rdm_providers.irp.f b/src/cipsi/pert_rdm_providers.irp.f index e2917261..caea57b2 100644 --- a/src/cipsi/pert_rdm_providers.irp.f +++ b/src/cipsi/pert_rdm_providers.irp.f @@ -31,7 +31,7 @@ BEGIN_PROVIDER [double precision, pert_2rdm_provider, (n_orb_pert_rdm,n_orb_pert END_PROVIDER -subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection) +subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, psi_det_connection, psi_coef_connection_reverse, n_det_connection) use bitmasks use selection_types implicit none @@ -44,12 +44,10 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num) double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate + integer :: s1, s2, p1, p2, ib, j, istate, jstate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) double precision :: e_pert, delta_E, val, Hii, sum_e_pert, tmp, alpha_h_psi, coef(N_states) double precision, external :: diag_H_mat_elem_fock @@ -152,9 +150,16 @@ subroutine fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fo e_pert = 0.5d0 * (tmp - delta_E) coef(istate) = e_pert / alpha_h_psi print*,e_pert,coef,alpha_h_psi - pt2(istate) = pt2(istate) + e_pert - variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi - norm(istate) = norm(istate) + coef(istate) * coef(istate) + pt2_data % pt2(istate) += e_pert + pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi + enddo + + do istate=1,N_states + alpha_h_psi = mat(istate, p1, p2) + e_pert = coef(istate) * alpha_h_psi + do jstate=1,N_states + pt2_data % overlap(jstate,jstate) = coef(istate) * coef(jstate) + enddo if (weight_selection /= 5) then ! Energy selection diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index b81eecda..31f27e1d 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -61,15 +61,15 @@ logical function testTeethBuilding(minF, N) allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) - norm = 0.d0 - double precision :: norm + double precision :: norm2 + norm2 = 0.d0 do i=N_det_generators,1,-1 tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate) * & psi_coef_sorted_gen(i,pt2_stoch_istate) - norm = norm + tilde_w(i) + norm2 = norm2 + tilde_w(i) enddo - f = 1.d0/norm + f = 1.d0/norm2 tilde_w(:) = tilde_w(:) * f tilde_cW(0) = -1.d0 @@ -107,7 +107,7 @@ end function -subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) +subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) use f77_zmq use selection_types @@ -117,10 +117,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) integer, intent(in) :: N_in ! integer, intent(inout) :: N_in double precision, intent(in) :: relative_error, E(N_states) - double precision, intent(out) :: pt2(N_states),error(N_states) - double precision, intent(out) :: variance(N_states),norm(N_states) - - + type(pt2_type), intent(inout) :: pt2_data, pt2_data_err +! integer :: i, N double precision :: state_average_weight_save(N_states), w(N_states,4) @@ -138,11 +136,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) endif if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then - pt2=0.d0 - variance=0.d0 - norm=0.d0 - call ZMQ_selection(N_in, pt2, variance, norm) - error(:) = 0.d0 + call ZMQ_selection(N_in, pt2_data) else N = max(N_in,1) * N_states @@ -248,8 +242,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) mem_collector = 8.d0 * & ! bytes ( 1.d0*pt2_n_tasks_max & ! task_id, index + 0.635d0*N_det_generators & ! f,d - + 3.d0*N_det_generators*N_states & ! eI, vI, nI - + 3.d0*pt2_n_tasks_max*N_states & ! eI_task, vI_task, nI_task + + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task + + N_det_generators*pt2_type_size(N_states) & ! pt2_data_I + 4.d0*(pt2_N_teeth+1) & ! S, S2, T2, T3 + 1.d0*(N_int*2.d0*N + N) & ! selection buffer + 1.d0*(N_int*2.d0*N + N) & ! sort selection buffer @@ -264,7 +258,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) nproc_target * 8.d0 * & ! bytes ( 0.5d0*pt2_n_tasks_max & ! task_id + 64.d0*pt2_n_tasks_max & ! task - + 3.d0*pt2_n_tasks_max*N_states & ! pt2, variance, norm + + pt2_type_size(N_states)*pt2_n_tasks_max*N_states & ! pt2, variance, overlap + 1.d0*pt2_n_tasks_max & ! i_generator, subset + 1.d0*(N_int*2.d0*ii+ ii) & ! selection buffer + 1.d0*(N_int*2.d0*ii+ ii) & ! sort selection buffer @@ -293,21 +287,24 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) call omp_set_nested(.false.) - print '(A)', '========== ================= =========== =============== =============== =================' - print '(A)', ' Samples Energy Stat. Err Variance Norm Seconds ' - print '(A)', '========== ================= =========== =============== =============== =================' + print '(A)', '========== ======================= ===================== ===================== ===========' + print '(A)', ' Samples Energy Variance Norm^2 Seconds' + print '(A)', '========== ======================= ===================== ===================== ===========' PROVIDE global_selection_buffer + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w(1,1), w(1,2), w(1,3), w(1,4), b, N) - pt2(pt2_stoch_istate) = w(pt2_stoch_istate,1) - error(pt2_stoch_istate) = w(pt2_stoch_istate,2) - variance(pt2_stoch_istate) = w(pt2_stoch_istate,3) - norm(pt2_stoch_istate) = w(pt2_stoch_istate,4) + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N) + pt2_data % rpt2(pt2_stoch_istate) = & + pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)) + + !TODO : We should use here the correct formula for the error of X/Y + pt2_data_err % rpt2(pt2_stoch_istate) = & + pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)) else call pt2_slave_inproc(i) @@ -315,11 +312,30 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - print '(A)', '========== ================= =========== =============== =============== =================' + print '(A)', '========== ======================= ===================== ===================== ===========' + + do k=1,N_states + pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate) + enddo + SOFT_TOUCH pt2_overlap enddo FREE pt2_stoch_istate + ! Symmetrize overlap + do j=2,N_states + do i=1,j-1 + pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i)) + pt2_overlap(j,i) = pt2_overlap(i,j) + enddo + enddo + + print *, 'Overlap of perturbed states:' + do k=1,N_states + print *, pt2_overlap(k,:) + enddo + print *, '-------' + if (N_in > 0) then b%cur = min(N_in,b%cur) if (s2_eig) then @@ -334,11 +350,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm, N_in) state_average_weight(:) = state_average_weight_save(:) TOUCH state_average_weight endif - do k=N_det+1,N_states - pt2(k) = 0.d0 - enddo - call update_pt2_and_variance_weights(pt2, variance, norm, N_states) + call update_pt2_and_variance_weights(pt2_data, N_states) end subroutine @@ -352,7 +365,7 @@ subroutine pt2_slave_inproc(i) end -subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, variance, norm, b, N_) +subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_) use f77_zmq use selection_types use bitmasks @@ -361,15 +374,15 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(in) :: relative_error, E - double precision, intent(out) :: pt2(N_states), error(N_states) - double precision, intent(out) :: variance(N_states), norm(N_states) + type(pt2_type), intent(inout) :: pt2_data, pt2_data_err type(selection_buffer), intent(inout) :: b integer, intent(in) :: N_ - - double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) - double precision, allocatable :: vI(:,:), vI_task(:,:), T2(:) - double precision, allocatable :: nI(:,:), nI_task(:,:), T3(:) + type(pt2_type), allocatable :: pt2_data_task(:) + type(pt2_type), allocatable :: pt2_data_I(:) + type(pt2_type), allocatable :: pt2_data_S(:) + type(pt2_type), allocatable :: pt2_data_S2(:) + type(pt2_type) :: pt2_data_teeth integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer, external :: zmq_delete_tasks_async_send @@ -377,11 +390,14 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc integer, external :: zmq_abort integer, external :: pt2_find_sample_lr + PROVIDE pt2_stoch_istate + integer :: more, n, i, p, c, t, n_tasks, U integer, allocatable :: task_id(:) integer, allocatable :: index(:) - double precision :: v, x, x2, x3, avg, avg2, avg3, eqt, E0, v0, n0 + double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states) + double precision :: eqta(N_states) double precision :: time, time1, time0 integer, allocatable :: f(:) @@ -406,11 +422,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc ! updated in ZMQ_pt2 allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) allocate(d(N_det_generators+1)) - allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) - allocate(vI(N_states, N_det_generators), vI_task(N_states, pt2_n_tasks_max)) - allocate(nI(N_states, N_det_generators), nI_task(N_states, pt2_n_tasks_max)) - allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) - allocate(T2(pt2_N_teeth+1), T3(pt2_N_teeth+1)) + allocate(pt2_data_task(pt2_n_tasks_max)) + allocate(pt2_data_I(N_det_generators)) + allocate(pt2_data_S(pt2_N_teeth+1)) + allocate(pt2_data_S2(pt2_N_teeth+1)) @@ -418,26 +433,31 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc call create_selection_buffer(N_, N_*2, b2) - pt2(:) = -huge(1.) - error(:) = huge(1.) - variance(:) = huge(1.) - norm(:) = 0.d0 - S(:) = 0d0 - S2(:) = 0d0 - T2(:) = 0d0 - T3(:) = 0d0 + pt2_data % pt2(pt2_stoch_istate) = -huge(1.) + pt2_data_err % pt2(pt2_stoch_istate) = huge(1.) + pt2_data % variance(pt2_stoch_istate) = huge(1.) + pt2_data_err % variance(pt2_stoch_istate) = huge(1.) + pt2_data % overlap(:,pt2_stoch_istate) = 0.d0 + pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.) n = 1 t = 0 U = 0 - eI(:,:) = 0d0 - vI(:,:) = 0d0 - nI(:,:) = 0d0 + do i=1,pt2_n_tasks_max + call pt2_alloc(pt2_data_task(i),N_states) + enddo + do i=1,pt2_N_teeth+1 + call pt2_alloc(pt2_data_S(i),N_states) + call pt2_alloc(pt2_data_S2(i),N_states) + enddo + do i=1,N_det_generators + call pt2_alloc(pt2_data_I(i),N_states) + enddo f(:) = pt2_F(:) d(:) = .false. n_tasks = 0 E0 = E v0 = 0.d0 - n0 = 0.d0 + n0(:) = 0.d0 more = 1 call wall_time(time0) time1 = time0 @@ -457,11 +477,11 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc t=t+1 E0 = 0.d0 v0 = 0.d0 - n0 = 0.d0 + n0(:) = 0.d0 do i=pt2_n_0(t),1,-1 - E0 += eI(pt2_stoch_istate, i) - v0 += vI(pt2_stoch_istate, i) - n0 += nI(pt2_stoch_istate, i) + E0 += pt2_data_I(i) % pt2(pt2_stoch_istate) + v0 += pt2_data_I(i) % variance(pt2_stoch_istate) + n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate) end do else exit @@ -471,45 +491,59 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc ! Add Stochastic part c = pt2_R(n) if(c > 0) then -!print *, 'c>0' - x = 0d0 - x2 = 0d0 - x3 = 0d0 + + call pt2_alloc(pt2_data_teeth,N_states) do p=pt2_N_teeth, 1, -1 v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1)) - x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) - x2 += vI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) - x3 += nI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) - S(p) += x - S2(p) += x*x - T2(p) += x2 - T3(p) += x3 - end do - avg = E0 + S(t) / dble(c) - avg2 = v0 + T2(t) / dble(c) - avg3 = n0 + T3(t) / dble(c) + v = pt2_W_T / pt2_w(i) + call pt2_add ( pt2_data_teeth, v, pt2_data_I(i) ) + call pt2_add ( pt2_data_S(p), 1.d0, pt2_data_teeth ) + call pt2_add2( pt2_data_S2(p), 1.d0, pt2_data_teeth ) + enddo + call pt2_dealloc(pt2_data_teeth) + + avg = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c) + avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c) + avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c) if ((avg /= 0.d0) .or. (n == N_det_generators) ) then do_exit = .true. endif if (qp_stop()) then stop_now = .True. endif - pt2(pt2_stoch_istate) = avg - variance(pt2_stoch_istate) = avg2 - norm(pt2_stoch_istate) = avg3 + pt2_data % pt2(pt2_stoch_istate) = avg + pt2_data % variance(pt2_stoch_istate) = avg2 + pt2_data % overlap(:,pt2_stoch_istate) = avg3(:) call wall_time(time) ! 1/(N-1.5) : see Brugger, The American Statistician (23) 4 p. 32 (1969) if(c > 2) then - eqt = dabs((S2(t) / c) - (S(t)/c)**2) ! dabs for numerical stability + eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability eqt = sqrt(eqt / (dble(c) - 1.5d0)) - error(pt2_stoch_istate) = eqt + pt2_data_err % pt2(pt2_stoch_istate) = eqt + + eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability + eqt = sqrt(eqt / (dble(c) - 1.5d0)) + pt2_data_err % variance(pt2_stoch_istate) = eqt + + eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability + eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0)) + pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:) + + if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then time1 = time - print '(G10.3, 2X, F16.10, 2X, G10.3, 2X, F14.10, 2X, F14.10, 2X, F10.4, A10)', c, avg+E, eqt, avg2, avg3, time-time0, '' + print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, & + pt2_data % pt2(pt2_stoch_istate) +E, & + pt2_data_err % pt2(pt2_stoch_istate), & + pt2_data % variance(pt2_stoch_istate), & + pt2_data_err % variance(pt2_stoch_istate), & + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate), & + pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), & + time-time0 if (stop_now .or. ( & - (do_exit .and. (dabs(error(pt2_stoch_istate)) / & - (1.d-20 + dabs(pt2(pt2_stoch_istate)) ) <= relative_error))) ) then + (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) / & + (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(10) if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -524,10 +558,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc else if(more == 0) then exit else - call pull_pt2_results(zmq_socket_pull, index, eI_task, vI_task, nI_task, task_id, n_tasks, b2) + call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2) if(n_tasks > pt2_n_tasks_max)then print*,'PB !!!' - print*,'If you see this, send an email to Anthony scemama with the following content' + print*,'If you see this, send a bug report with the following content' print*,irp_here print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max stop -1 @@ -536,16 +570,14 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc stop 'PT2: Unable to delete tasks (send)' endif do i=1,n_tasks - if(index(i).gt.size(eI,2).or.index(i).lt.1)then + if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then print*,'PB !!!' - print*,'If you see this, send an email to Anthony scemama with the following content' + print*,'If you see this, send a bug report with the following content' print*,irp_here - print*,'i,index(i),size(ei,2) = ',i,index(i),size(ei,2) + print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1) stop -1 endif - eI(1:N_states, index(i)) += eI_task(1:N_states,i) - vI(1:N_states, index(i)) += vI_task(1:N_states,i) - nI(1:N_states, index(i)) += nI_task(1:N_states,i) + call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i)) f(index(i)) -= 1 end do do i=1, b2%cur @@ -558,6 +590,16 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, varianc endif end if end do + do i=1,N_det_generators + call pt2_dealloc(pt2_data_I(i)) + enddo + do i=1,pt2_N_teeth+1 + call pt2_dealloc(pt2_data_S(i)) + call pt2_dealloc(pt2_data_S2(i)) + enddo + do i=1,pt2_n_tasks_max + call pt2_dealloc(pt2_data_task(i)) + enddo !print *, 'deleting b2' call delete_selection_buffer(b2) !print *, 'sorting b' @@ -757,13 +799,13 @@ END_PROVIDER tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20 enddo - double precision :: norm - norm = 0.d0 + double precision :: norm2 + norm2 = 0.d0 do i=N_det_generators,1,-1 - norm += tilde_w(i) + norm2 += tilde_w(i) enddo - tilde_w(:) = tilde_w(:) / norm + tilde_w(:) = tilde_w(:) / norm2 tilde_cW(0) = -1.d0 do i=1,N_det_generators diff --git a/src/cipsi/pt2_type.irp.f b/src/cipsi/pt2_type.irp.f new file mode 100644 index 00000000..ee90d421 --- /dev/null +++ b/src/cipsi/pt2_type.irp.f @@ -0,0 +1,128 @@ +subroutine pt2_alloc(pt2_data,N) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + integer, intent(in) :: N + integer :: k + + allocate(pt2_data % pt2(N) & + ,pt2_data % variance(N) & + ,pt2_data % rpt2(N) & + ,pt2_data % overlap(N,N) & + ) + + pt2_data % pt2(:) = 0.d0 + pt2_data % variance(:) = 0.d0 + pt2_data % rpt2(:) = 0.d0 + pt2_data % overlap(:,:) = 0.d0 + +end subroutine + +subroutine pt2_dealloc(pt2_data) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + deallocate(pt2_data % pt2 & + ,pt2_data % variance & + ,pt2_data % rpt2 & + ,pt2_data % overlap & + ) +end subroutine + +subroutine pt2_add(p1, w, p2) + implicit none + use selection_types + BEGIN_DOC +! p1 += w * p2 + END_DOC + type(pt2_type), intent(inout) :: p1 + double precision, intent(in) :: w + type(pt2_type), intent(in) :: p2 + + if (w == 1.d0) then + + p1 % pt2(:) = p1 % pt2(:) + p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap(:,:) + + else + + p1 % pt2(:) = p1 % pt2(:) + w * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + w * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + w * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap(:,:) + + endif + +end subroutine + + +subroutine pt2_add2(p1, w, p2) + implicit none + use selection_types + BEGIN_DOC +! p1 += w * p2**2 + END_DOC + type(pt2_type), intent(inout) :: p1 + double precision, intent(in) :: w + type(pt2_type), intent(in) :: p2 + + if (w == 1.d0) then + + p1 % pt2(:) = p1 % pt2(:) + p2 % pt2(:) * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + p2 % rpt2(:) * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + p2 % variance(:) * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + p2 % overlap(:,:) * p2 % overlap(:,:) + + else + + p1 % pt2(:) = p1 % pt2(:) + w * p2 % pt2(:) * p2 % pt2(:) + p1 % rpt2(:) = p1 % rpt2(:) + w * p2 % rpt2(:) * p2 % rpt2(:) + p1 % variance(:) = p1 % variance(:) + w * p2 % variance(:) * p2 % variance(:) + p1 % overlap(:,:) = p1 % overlap(:,:) + w * p2 % overlap(:,:) * p2 % overlap(:,:) + + endif + +end subroutine + + +subroutine pt2_serialize(pt2_data, n, x) + implicit none + use selection_types + type(pt2_type), intent(in) :: pt2_data + integer, intent(in) :: n + double precision, intent(out) :: x(*) + + integer :: i,k,n2 + + n2 = n*n + x(1:n) = pt2_data % pt2(1:n) + k=n + x(k+1:k+n) = pt2_data % rpt2(1:n) + k=k+n + x(k+1:k+n) = pt2_data % variance(1:n) + k=k+n + x(k+1:k+n2) = reshape(pt2_data % overlap(1:n,1:n), (/ n2 /)) + +end + +subroutine pt2_deserialize(pt2_data, n, x) + implicit none + use selection_types + type(pt2_type), intent(inout) :: pt2_data + integer, intent(in) :: n + double precision, intent(in) :: x(*) + + integer :: i,k,n2 + + n2 = n*n + pt2_data % pt2(1:n) = x(1:n) + k=n + pt2_data % rpt2(1:n) = x(k+1:k+n) + k=k+n + pt2_data % variance(1:n) = x(k+1:k+n) + k=k+n + pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /)) + +end diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index 7df98a87..d3f4d45d 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -1,8 +1,8 @@ - use omp_lib + use omp_lib use selection_types use f77_zmq BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ] - use omp_lib + use omp_lib implicit none BEGIN_DOC ! Global buffer for the OpenMP selection @@ -11,7 +11,7 @@ BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ] END_PROVIDER BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ] - use omp_lib + use omp_lib implicit none BEGIN_DOC ! Global buffer for the OpenMP selection @@ -61,7 +61,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) type(selection_buffer) :: b logical :: done, buffer_ready - double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:) + type(pt2_type), allocatable :: pt2_data(:) integer :: n_tasks, k, N integer, allocatable :: i_generator(:), subset(:) @@ -70,10 +70,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) ! logical :: sending allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) - allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) - allocate(variance(N_states,pt2_n_tasks_max)) - allocate(norm(N_states,pt2_n_tasks_max)) - + allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() integer, external :: connect_to_taskserver @@ -120,13 +117,11 @@ subroutine run_pt2_slave_small(thread,iproc,energy) double precision :: time0, time1 call wall_time(time0) do k=1,n_tasks - pt2(:,k) = 0.d0 - variance(:,k) = 0.d0 - norm(:,k) = 0.d0 - b%cur = 0 + call pt2_alloc(pt2_data(k),N_states) + b%cur = 0 !double precision :: time2 !call wall_time(time2) - call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k))) + call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) !call wall_time(time1) !print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) enddo @@ -138,11 +133,15 @@ subroutine run_pt2_slave_small(thread,iproc,energy) done = .true. endif call sort_selection_buffer(b) - call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks) + call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks) + do k=1,n_tasks + call pt2_dealloc(pt2_data(k)) + enddo b%cur=0 ! ! Try to adjust n_tasks around nproc/2 seconds per job n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0))) + n_tasks = min(n_tasks, pt2_n_tasks_max) ! n_tasks = 1 end do @@ -158,6 +157,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) if (buffer_ready) then call delete_selection_buffer(b) endif + deallocate(pt2_data) end subroutine @@ -171,8 +171,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy) integer :: rc, i integer :: worker_id, ctask, ltask - character*(512), allocatable :: task(:) - integer, allocatable :: task_id(:) + character*(512) :: task + integer :: task_id(1) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -183,20 +183,15 @@ subroutine run_pt2_slave_large(thread,iproc,energy) type(selection_buffer) :: b logical :: done, buffer_ready - double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:) + type(pt2_type) :: pt2_data(1) integer :: n_tasks, k, N - integer, allocatable :: i_generator(:), subset(:) + integer :: i_generator(1), subset integer :: bsize ! Size of selection buffers logical :: sending PROVIDE global_selection_buffer global_selection_buffer_lock - allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) - allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) - allocate(variance(N_states,pt2_n_tasks_max)) - allocate(norm(N_states,pt2_n_tasks_max)) - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() integer, external :: connect_to_taskserver @@ -215,22 +210,17 @@ subroutine run_pt2_slave_large(thread,iproc,energy) done = .False. do while (.not.done) - n_tasks = max(1,n_tasks) - n_tasks = min(pt2_n_tasks_max,n_tasks) - integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then exit endif - done = task_id(n_tasks) == 0 + done = task_id(1) == 0 if (done) then n_tasks = n_tasks-1 endif if (n_tasks == 0) exit - do k=1,n_tasks - read (task(k),*) subset(k), i_generator(k), N - enddo + read (task,*) subset, i_generator(1), N if (b%N == 0) then ! Only first time bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) @@ -242,17 +232,13 @@ subroutine run_pt2_slave_large(thread,iproc,energy) double precision :: time0, time1 call wall_time(time0) - do k=1,n_tasks - pt2(:,k) = 0.d0 - variance(:,k) = 0.d0 - norm(:,k) = 0.d0 - b%cur = 0 + call pt2_alloc(pt2_data(1),N_states) + b%cur = 0 !double precision :: time2 !call wall_time(time2) - call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k))) + call select_connected(i_generator(1),energy,pt2_data(1),b,subset,pt2_F(i_generator(1))) !call wall_time(time1) !print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) - enddo call wall_time(time1) !print *, '-->', i_generator(1), time1-time0, n_tasks @@ -269,16 +255,14 @@ subroutine run_pt2_slave_large(thread,iproc,energy) call omp_unset_lock(global_selection_buffer_lock) if ( iproc == 1 ) then call omp_set_lock(global_selection_buffer_lock) - call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, global_selection_buffer, task_id, n_tasks,sending) + call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, global_selection_buffer, task_id, n_tasks,sending) global_selection_buffer%cur = 0 call omp_unset_lock(global_selection_buffer_lock) else - call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks,sending) + call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks,sending) endif -! ! Try to adjust n_tasks around nproc/2 seconds per job -! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0))) - n_tasks = 1 + call pt2_dealloc(pt2_data(1)) end do call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) @@ -298,39 +282,36 @@ subroutine run_pt2_slave_large(thread,iproc,energy) end subroutine -subroutine push_pt2_results(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks) +subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks) use selection_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2(N_states,n_tasks) - double precision, intent(in) :: variance(N_states,n_tasks) - double precision, intent(in) :: norm(N_states,n_tasks) + type(pt2_type), intent(in) :: pt2_data(n_tasks) integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) type(selection_buffer), intent(inout) :: b logical :: sending sending = .False. - call push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending) + call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending) call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending) end subroutine -subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, norm, b, task_id, n_tasks, sending) +subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending) use selection_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2(N_states,n_tasks) - double precision, intent(in) :: variance(N_states,n_tasks) - double precision, intent(in) :: norm(N_states,n_tasks) + type(pt2_type), intent(in) :: pt2_data(n_tasks) integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks) type(selection_buffer), intent(inout) :: b logical, intent(inout) :: sending - integer :: rc + integer :: rc, i integer*8 :: rc8 + double precision, allocatable :: pt2_serialized(:,:) if (sending) then print *, irp_here, ': sending is true' @@ -358,32 +339,18 @@ subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2, variance, no endif - rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states*n_tasks, ZMQ_SNDMORE) + allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) ) + do i=1,n_tasks + call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i)) + enddo + + rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) + deallocate(pt2_serialized) if (rc == -1) then print *, irp_here, ': error sending result' stop 3 return - else if(rc /= 8*N_states*n_tasks) then - stop 'push' - endif - - - rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states*n_tasks, ZMQ_SNDMORE) - if (rc == -1) then - print *, irp_here, ': error sending result' - stop 4 - return - else if(rc /= 8*N_states*n_tasks) then - stop 'push' - endif - - - rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states*n_tasks, ZMQ_SNDMORE) - if (rc == -1) then - print *, irp_here, ': error sending result' - stop 5 - return - else if(rc /= 8*N_states*n_tasks) then + else if(rc /= size(pt2_serialized)*8) then stop 'push' endif @@ -475,7 +442,7 @@ IRP_ELSE stop 11 return else if (rc /= 8) then - print *, irp_here//': error in receiving mini' + print *, irp_here//': error in receiving mini' stop 12 endif IRP_ENDIF @@ -484,19 +451,18 @@ end subroutine -subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id, n_tasks, b) +subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b) use selection_types use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2(N_states,*) - double precision, intent(inout) :: variance(N_states,*) - double precision, intent(inout) :: norm(N_states,*) + type(pt2_type), intent(inout) :: pt2_data(*) type(selection_buffer), intent(inout) :: b integer, intent(out) :: index(*) integer, intent(out) :: n_tasks, task_id(*) integer :: rc, rn, i integer*8 :: rc8 + double precision, allocatable :: pt2_serialized(:,:) rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) if (rc == -1) then @@ -514,29 +480,19 @@ subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id stop 'pull' endif - rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8*n_tasks, 0) + allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) ) + rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0) if (rc == -1) then n_tasks = 1 task_id(1) = 0 - else if(rc /= 8*N_states*n_tasks) then + else if(rc /= 8*size(pt2_serialized)) then stop 'pull' endif - rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8*n_tasks, 0) - if (rc == -1) then - n_tasks = 1 - task_id(1) = 0 - else if(rc /= 8*N_states*n_tasks) then - stop 'pull' - endif - - rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8*n_tasks, 0) - if (rc == -1) then - n_tasks = 1 - task_id(1) = 0 - else if(rc /= 8*N_states*n_tasks) then - stop 'pull' - endif + do i=1,n_tasks + call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i)) + enddo + deallocate(pt2_serialized) rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0) if (rc == -1) then diff --git a/src/cipsi/run_selection_slave.irp.f b/src/cipsi/run_selection_slave.irp.f index feaa2440..c2ba2379 100644 --- a/src/cipsi/run_selection_slave.irp.f +++ b/src/cipsi/run_selection_slave.irp.f @@ -18,9 +18,7 @@ subroutine run_selection_slave(thread,iproc,energy) type(selection_buffer) :: buf, buf2 logical :: done, buffer_ready - double precision :: pt2(N_states) - double precision :: variance(N_states) - double precision :: norm(N_states) + type(pt2_type) :: pt2_data PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order @@ -28,6 +26,7 @@ subroutine run_selection_slave(thread,iproc,energy) PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym PROVIDE psi_selectors_coef_transp psi_det_sorted weight_selection + call pt2_alloc(pt2_data,N_states) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -42,9 +41,6 @@ subroutine run_selection_slave(thread,iproc,energy) buf%N = 0 buffer_ready = .False. ctask = 1 - pt2(:) = 0d0 - variance(:) = 0d0 - norm(:) = 0.d0 do integer, external :: get_task_from_taskserver @@ -59,13 +55,17 @@ subroutine run_selection_slave(thread,iproc,energy) read(task,*) subset, i_generator, N if(buf%N == 0) then ! Only first time - bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) - call create_selection_buffer(bsize, bsize*2, buf) + call create_selection_buffer(N, N*2, buf) buffer_ready = .True. else - ASSERT (N == buf%N) + if (N /= buf%N) then + print *, 'N=', N + print *, 'buf%N=', buf%N + print *, 'bug in ', irp_here + stop '-1' + end if end if - call select_connected(i_generator,energy,pt2,variance,norm,buf,subset,pt2_F(i_generator)) + call select_connected(i_generator,energy,pt2_data,buf,subset,pt2_F(i_generator)) endif integer, external :: task_done_to_taskserver @@ -84,12 +84,10 @@ subroutine run_selection_slave(thread,iproc,energy) if(ctask > 0) then call sort_selection_buffer(buf) ! call merge_selection_buffers(buf,buf2) -!print *, task_id(1), pt2(1), buf%cur, ctask - call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask) + call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask) + call pt2_dealloc(pt2_data) + call pt2_alloc(pt2_data,N_states) ! buf%mini = buf2%mini - pt2(:) = 0d0 - variance(:) = 0d0 - norm(:) = 0d0 buf%cur = 0 end if ctask = 0 @@ -102,14 +100,12 @@ subroutine run_selection_slave(thread,iproc,energy) if(ctask > 0) then call sort_selection_buffer(buf) ! call merge_selection_buffers(buf,buf2) - call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask) + call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask) ! buf%mini = buf2%mini - pt2(:) = 0d0 - variance(:) = 0d0 - norm(:) = 0d0 buf%cur = 0 end if ctask = 0 + call pt2_dealloc(pt2_data) integer, external :: disconnect_from_taskserver if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then @@ -125,18 +121,17 @@ subroutine run_selection_slave(thread,iproc,energy) end subroutine -subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_id, ntask) +subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks) use f77_zmq use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: pt2(N_states) - double precision, intent(in) :: variance(N_states) - double precision, intent(in) :: norm(N_states) + type(pt2_type), intent(in) :: pt2_data type(selection_buffer), intent(inout) :: b - integer, intent(in) :: ntask, task_id(*) + integer, intent(in) :: ntasks, task_id(*) integer :: rc + double precision, allocatable :: pt2_serialized(:) rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE) if(rc /= 4) then @@ -144,20 +139,18 @@ subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_ endif - rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) then - print *, 'f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)' - endif + allocate(pt2_serialized (pt2_type_size(N_states)) ) + call pt2_serialize(pt2_data,N_states,pt2_serialized) - rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) then - print *, 'f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)' - endif - - rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) then - print *, 'f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)' + rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE) + if (rc == -1) then + print *, irp_here, ': error sending result' + stop 3 + return + else if(rc /= size(pt2_serialized)*8) then + stop 'push' endif + deallocate(pt2_serialized) if (b%cur > 0) then @@ -173,14 +166,14 @@ subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_ endif - rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE) if(rc /= 4) then - print *, 'f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)' + print *, 'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)' endif - rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0) - if(rc /= 4*ntask) then - print *, 'f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)' + rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0) + if(rc /= 4*ntasks) then + print *, 'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)' endif ! Activate is zmq_socket_push is a REQ @@ -197,42 +190,34 @@ IRP_ENDIF end subroutine -subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det, N, task_id, ntask) +subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks) use f77_zmq use selection_types implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data double precision, intent(out) :: val(*) integer(bit_kind), intent(out) :: det(N_int, 2, *) - integer, intent(out) :: N, ntask, task_id(*) + integer, intent(out) :: N, ntasks, task_id(*) integer :: rc, rn, i + double precision, allocatable :: pt2_serialized(:) rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) then print *, 'f77_zmq_recv( zmq_socket_pull, N, 4, 0)' endif - pt2(:) = 0.d0 - variance(:) = 0.d0 - norm(:) = 0.d0 - - rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0) - if(rc /= 8*N_states) then - print *, 'f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)' + allocate(pt2_serialized (pt2_type_size(N_states)) ) + rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0) + if (rc == -1) then + ntasks = 1 + task_id(1) = 0 + else if(rc /= 8*size(pt2_serialized)) then + stop 'pull' endif - rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0) - if(rc /= 8*N_states) then - print *, 'f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)' - endif - - rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0) - if(rc /= 8*N_states) then - print *, 'f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)' - endif + call pt2_deserialize(pt2_data,N_states,pt2_serialized) + deallocate(pt2_serialized) if (N>0) then rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0) @@ -246,14 +231,14 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det endif endif - rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) + rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0) if(rc /= 4) then - print *, 'f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)' + print *, 'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)' endif - rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0) - if(rc /= 4*ntask) then - print *, 'f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)' + rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0) + if(rc /= 4*ntasks) then + print *, 'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)' endif ! Activate is zmq_socket_pull is a REP diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index f6350a66..e599737c 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -19,22 +19,26 @@ BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ] variance_match_weight(:) = 1.d0 END_PROVIDER -subroutine update_pt2_and_variance_weights(pt2, variance, norm, N_st) +subroutine update_pt2_and_variance_weights(pt2_data, N_st) implicit none + use selection_types BEGIN_DOC ! Updates the PT2- and Variance- matching weights. END_DOC integer, intent(in) :: N_st - double precision, intent(in) :: pt2(N_st) - double precision, intent(in) :: variance(N_st) - double precision, intent(in) :: norm(N_st) + type(pt2_type), intent(in) :: pt2_data + double precision :: pt2(N_st) + double precision :: variance(N_st) - double precision :: avg, rpt2(N_st), element, dt, x + double precision :: avg, element, dt, x integer :: k integer, save :: i_iter=0 integer, parameter :: i_itermax = 1 double precision, allocatable, save :: memo_variance(:,:), memo_pt2(:,:) + pt2(:) = pt2_data % pt2(:) + variance(:) = pt2_data % variance(:) + if (i_iter == 0) then allocate(memo_variance(N_st,i_itermax), memo_pt2(N_st,i_itermax)) memo_pt2(:,:) = 1.d0 @@ -48,11 +52,6 @@ subroutine update_pt2_and_variance_weights(pt2, variance, norm, N_st) dt = 2.0d0 - do k=1,N_st - ! rPT2 - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo - avg = sum(pt2(1:N_st)) / dble(N_st) - 1.d-32 ! Avoid future division by zero do k=1,N_st element = exp(dt*(pt2(k)/avg -1.d0)) @@ -179,15 +178,13 @@ subroutine get_mask_phase(det1, pm, Nint) end subroutine -subroutine select_connected(i_generator,E0,pt2,variance,norm,b,subset,csubset) +subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) use bitmasks use selection_types implicit none integer, intent(in) :: i_generator, subset, csubset type(selection_buffer), intent(inout) :: b - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data integer :: k,l double precision, intent(in) :: E0(N_states) @@ -205,7 +202,7 @@ subroutine select_connected(i_generator,E0,pt2,variance,norm,b,subset,csubset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm,b,subset,csubset) + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset) deallocate(fock_diag_tmp) end subroutine @@ -254,7 +251,7 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint) end -subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm,buf,subset,csubset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -266,9 +263,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data type(selection_buffer), intent(inout) :: buf integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii,sze @@ -644,9 +639,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) if(.not.pert_2rdm)then - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf) + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) else - call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0)) + call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0)) endif end if enddo @@ -664,7 +659,7 @@ end subroutine -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf) +subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types implicit none @@ -672,16 +667,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d integer, intent(in) :: i_generator, sp, h1, h2 double precision, intent(in) :: mat(N_states, mo_num, mo_num) logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num) - double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: fock_diag_tmp(mo_num) double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - double precision, intent(inout) :: variance(N_states) - double precision, intent(inout) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate + integer :: s1, s2, p1, p2, ib, j, istate, jstate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, w, tmp, alpha_h_psi, coef + double precision :: e_pert(N_states), coef(N_states), X(N_states) + double precision :: delta_E, val, Hii, w, tmp, alpha_h_psi double precision, external :: diag_H_mat_elem_fock double precision :: E_shift @@ -689,7 +683,12 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision, allocatable :: values(:) integer, allocatable :: keys(:,:) integer :: nkeys - + double precision :: s_weight(N_states,N_states) + do jstate=1,N_states + do istate=1,N_states + s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) + enddo + enddo if(sp == 3) then s1 = 1 @@ -779,18 +778,42 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if (delta_E < 0.d0) then tmp = -tmp endif - e_pert = 0.5d0 * (tmp - delta_E) + e_pert(istate) = 0.5d0 * (tmp - delta_E) if (dabs(alpha_h_psi) > 1.d-4) then - coef = e_pert / alpha_h_psi + coef(istate) = e_pert(istate) / alpha_h_psi else - coef = alpha_h_psi / delta_E + coef(istate) = alpha_h_psi / delta_E endif - pt2(istate) = pt2(istate) + e_pert - variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi - norm(istate) = norm(istate) + coef * coef + if (e_pert(istate) < 0.d0) then + X(istate) = -dsqrt(-e_pert(istate)) + else + X(istate) = dsqrt(e_pert(istate)) + endif + enddo + +! ! Gram-Schmidt using input overlap matrix +! do istate=1,N_states +! do jstate=1,istate-1 +! if ( (pt2_overlap(jstate,istate) == 0.d0).or.(pt2_overlap(jstate,jstate) == 0.d0) ) cycle +! coef(istate) = coef(istate) - pt2_overlap(jstate,istate)/pt2_overlap(jstate,jstate) * coef(jstate) +! enddo +! enddo + + do istate=1, N_states + do jstate=1,N_states + pt2_data % overlap(jstate,istate) += coef(jstate) * coef(istate) + enddo + enddo + + do istate=1,N_states + alpha_h_psi = mat(istate, p1, p2) + + pt2_data % variance(istate) += alpha_h_psi * alpha_h_psi + pt2_data % pt2(istate) += e_pert(istate) !!!DEBUG -! pt2(istate) = pt2(istate) - e_pert + alpha_h_psi**2/delta_E +! delta_E = E0(istate) - Hii + E_shift +! pt2_data % pt2(istate) = pt2_data % pt2(istate) + alpha_h_psi**2/delta_E ! ! integer :: k ! double precision :: alpha_h_psi_2,hij @@ -811,14 +834,26 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection - w = w - alpha_h_psi * alpha_h_psi * selection_weight(istate) + w = w - alpha_h_psi * alpha_h_psi * s_weight(istate,istate) + do jstate=1,N_states + if (istate == jstate) cycle + w = w + alpha_h_psi*mat(jstate,p1,p2) * s_weight(istate,jstate) + enddo case(6) - w = w - coef * coef * selection_weight(istate) + w = w - coef(istate) * coef(istate) * s_weight(istate,istate) + do jstate=1,N_states + if (istate == jstate) cycle + w = w + coef(istate)*coef(jstate) * s_weight(istate,jstate) + enddo case default ! Energy selection - w = w + e_pert * selection_weight(istate) + w = w + e_pert(istate) * s_weight(istate,istate) + do jstate=1,N_states + if (istate == jstate) cycle + w = w - dabs(X(istate))*X(jstate) * s_weight(istate,jstate) + enddo end select end do diff --git a/src/cipsi/selection_types.f90 b/src/cipsi/selection_types.f90 index 29e48524..58ce0e03 100644 --- a/src/cipsi/selection_types.f90 +++ b/src/cipsi/selection_types.f90 @@ -5,5 +5,21 @@ module selection_types double precision, pointer :: val(:) double precision :: mini endtype + + type pt2_type + double precision, allocatable :: pt2(:) + double precision, allocatable :: rpt2(:) + double precision, allocatable :: variance(:) + double precision, allocatable :: overlap(:,:) + endtype + + contains + + integer function pt2_type_size(N) + implicit none + integer, intent(in) :: N + pt2_type_size = (3*n + n*n) + end function + end module diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index b8bf6a1d..c529795e 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -1,16 +1,19 @@ subroutine run_stochastic_cipsi + use selection_types implicit none BEGIN_DOC ! Selected Full Configuration Interaction with Stochastic selection and PT2. END_DOC integer :: i,j,k - double precision, allocatable :: pt2(:), variance(:), norm(:), rpt2(:), zeros(:) + double precision, allocatable :: zeros(:) integer :: to_select - logical, external :: qp_stop + type(pt2_type) :: pt2_data, pt2_data_err + logical, external :: qp_stop + double precision :: rss double precision, external :: memory_of_double - PROVIDE H_apply_buffer_allocated + PROVIDE H_apply_buffer_allocated N_iter = 1 threshold_generators = 1.d0 @@ -19,7 +22,9 @@ subroutine run_stochastic_cipsi rss = memory_of_double(N_states)*4.d0 call check_mem(rss,irp_here) - allocate (pt2(N_states), zeros(N_states), rpt2(N_states), norm(N_states), variance(N_states)) + allocate (zeros(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) double precision :: hf_energy_ref logical :: has @@ -28,10 +33,10 @@ subroutine run_stochastic_cipsi relative_error=PT2_relative_error zeros = 0.d0 - pt2 = -huge(1.e0) - rpt2 = -huge(1.e0) - norm = 0.d0 - variance = huge(1.e0) + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap= 0.d0 + pt2_data % variance = huge(1.e0) if (s2_eig) then call make_s2_eigenfunction @@ -59,14 +64,13 @@ subroutine run_stochastic_cipsi endif double precision :: correlation_energy_ratio - double precision :: error(N_states) correlation_energy_ratio = 0.d0 do while ( & (N_det < N_det_max) .and. & - (maxval(abs(rpt2(1:N_states))) > pt2_max) .and. & - (maxval(abs(variance(1:N_states))) > variance_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. & (correlation_energy_ratio <= correlation_energy_ratio_max) & ) write(*,'(A)') '--------------------------------------------------------------------------------' @@ -75,30 +79,28 @@ subroutine run_stochastic_cipsi to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) - pt2 = 0.d0 - variance = 0.d0 - norm = 0.d0 - call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & - norm, to_select) ! Stochastic PT2 and selection - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error,to_select) ! Stochastic PT2 and selection correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref) / & - (psi_energy_with_nucl_rep(1) + rpt2(1) - hf_energy_ref) + (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref) correlation_energy_ratio = min(1.d0,correlation_energy_ratio) call write_double(6,correlation_energy_ratio, 'Correlation ratio') - call print_summary(psi_energy_with_nucl_rep,pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) - call save_energy(psi_energy_with_nucl_rep, rpt2) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() N_iter += 1 - if (qp_stop()) exit + if (qp_stop()) exit ! Add selected determinants call copy_H_apply_buffer_to_wf() @@ -111,7 +113,7 @@ subroutine run_stochastic_cipsi call diagonalize_CI call save_wavefunction call save_energy(psi_energy_with_nucl_rep, zeros) - if (qp_stop()) exit + if (qp_stop()) exit enddo if (.not.qp_stop()) then @@ -121,20 +123,19 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, zeros) endif - pt2(:) = 0.d0 - variance(:) = 0.d0 - norm(:) = 0.d0 - call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance, & - norm,0) ! Stochastic PT2 + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo - - call save_energy(psi_energy_with_nucl_rep, rpt2) - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + call print_summary(psi_energy_with_nucl_rep, & + pt2_data , pt2_data_err, N_det, N_occ_pattern, N_states, psi_s2) + call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) call print_extrapolated_energy() endif + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) end diff --git a/src/cipsi/zmq_selection.irp.f b/src/cipsi/zmq_selection.irp.f index 081d998f..448b409e 100644 --- a/src/cipsi/zmq_selection.irp.f +++ b/src/cipsi/zmq_selection.irp.f @@ -1,4 +1,4 @@ -subroutine ZMQ_selection(N_in, pt2, variance, norm) +subroutine ZMQ_selection(N_in, pt2_data) use f77_zmq use selection_types @@ -7,15 +7,14 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm) integer(ZMQ_PTR) :: zmq_to_qp_run_socket , zmq_socket_pull integer, intent(in) :: N_in type(selection_buffer) :: b - integer :: i, N + integer :: i, l, N integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - double precision, intent(out) :: variance(N_states) - double precision, intent(out) :: norm(N_states) + type(pt2_type), intent(inout) :: pt2_data ! PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators N = max(N_in,1) + N = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) if (.True.) then PROVIDE pt2_e0_denominator nproc PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -78,6 +77,7 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm) stop 'Unable to add task to task server' endif endif + N = max(N_in,1) ASSERT (associated(b%det)) @@ -104,42 +104,51 @@ subroutine ZMQ_selection(N_in, pt2, variance, norm) f(:) = 1.d0 if (.not.do_pt2) then - double precision :: f(N_states), u_dot_u + double precision :: f(N_states), u_dot_u do k=1,min(N_det,N_states) f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors) enddo endif - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, variance, norm) PRIVATE(i) NUM_THREADS(nproc_target+1) + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2_data) PRIVATE(i) NUM_THREADS(nproc_target+1) i = omp_get_thread_num() if (i==0) then - call selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) + call selection_collector(zmq_socket_pull, b, N, pt2_data) else call selection_slave_inproc(i) endif !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection') - do i=N_det+1,N_states - pt2(i) = 0.d0 - variance(i) = 0.d0 - norm(i) = 0.d0 - enddo if (N_in > 0) then if (s2_eig) then call make_selection_buffer_s2(b) endif call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) - call copy_H_apply_buffer_to_wf() - call save_wavefunction endif call delete_selection_buffer(b) + do k=1,N_states - pt2(k) = pt2(k) * f(k) - variance(k) = variance(k) * f(k) - norm(k) = norm(k) * f(k) + pt2_data % pt2(k) = pt2_data % pt2(k) * f(k) + pt2_data % variance(k) = pt2_data % variance(k) * f(k) + do l=1,N_states + pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l)) + pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l)) + enddo + + pt2_data % rpt2(k) = & + pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k)) enddo - call update_pt2_and_variance_weights(pt2, variance, norm, N_states) + pt2_overlap(:,:) = pt2_data % overlap(:,:) + + print *, 'Overlap of perturbed states:' + do l=1,N_states + print *, pt2_overlap(l,:) + enddo + print *, '-------' + SOFT_TOUCH pt2_overlap + call update_pt2_and_variance_weights(pt2_data, N_states) end subroutine @@ -151,7 +160,7 @@ subroutine selection_slave_inproc(i) call run_selection_slave(1,i,pt2_e0_denominator) end -subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) +subroutine selection_collector(zmq_socket_pull, b, N, pt2_data) use f77_zmq use selection_types use bitmasks @@ -161,12 +170,12 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) integer(ZMQ_PTR), intent(in) :: zmq_socket_pull type(selection_buffer), intent(inout) :: b integer, intent(in) :: N - double precision, intent(out) :: pt2(N_states) - double precision, intent(out) :: variance(N_states) - double precision, intent(out) :: norm(N_states) - double precision :: pt2_mwen(N_states) - double precision :: variance_mwen(N_states) - double precision :: norm_mwen(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(pt2_type) :: pt2_data_tmp + + double precision :: pt2_mwen(N_states) + double precision :: variance_mwen(N_states) + double precision :: norm2_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -184,24 +193,21 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() call create_selection_buffer(N, N*2, b2) + integer :: k double precision :: rss double precision, external :: memory_of_int rss = memory_of_int(N_det_generators) call check_mem(rss,irp_here) allocate(task_id(N_det_generators)) more = 1 - pt2(:) = 0d0 - variance(:) = 0.d0 - norm(:) = 0.d0 - pt2_mwen(:) = 0.d0 - variance_mwen(:) = 0.d0 - norm_mwen(:) = 0.d0 + pt2_data % pt2(:) = 0d0 + pt2_data % variance(:) = 0.d0 + pt2_data % overlap(:,:) = 0.d0 + call pt2_alloc(pt2_data_tmp,N_states) do while (more == 1) - call pull_selection_results(zmq_socket_pull, pt2_mwen, variance_mwen, norm_mwen, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) + call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask) - pt2(:) += pt2_mwen(:) - variance(:) += variance_mwen(:) - norm(:) += norm_mwen(:) + call pt2_add(pt2_data, 1.d0, pt2_data_tmp) do i=1, b2%cur call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i)) if (b2%val(i) > b%mini) exit @@ -217,6 +223,7 @@ subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm) endif end do end do + call pt2_dealloc(pt2_data_tmp) call delete_selection_buffer(b2) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index a5e85777..aa748628 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -450,7 +450,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if (s2_eig) then h_p = s_ do k=1,shift2 - h_p(k,k) = h_p(k,k) + S_z2_Sz - expected_s2 + h_p(k,k) = h_p(k,k) - expected_s2 enddo if (only_expected_s2) then alpha = 0.1d0 @@ -496,7 +496,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ 0.d0, s_, size(s_,1)) do k=1,shift2 - s2(k) = s_(k,k) + S_z2_Sz + s2(k) = s_(k,k) enddo if (only_expected_s2) then diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 996011cd..33e9478a 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -107,7 +107,7 @@ END_PROVIDER H_prime(1:N_det,1:N_det) = H_matrix_all_dets(1:N_det,1:N_det) + & alpha * S2_matrix_all_dets(1:N_det,1:N_det) do j=1,N_det - H_prime(j,j) = H_prime(j,j) + alpha*(S_z2_Sz - expected_s2) + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 enddo call lapack_diag(eigenvalues,eigenvectors,H_prime,size(H_prime,1),N_det) CI_electronic_energy(:) = 0.d0 diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg index ef00080b..662c6fbb 100644 --- a/src/determinants/EZFIO.cfg +++ b/src/determinants/EZFIO.cfg @@ -44,7 +44,7 @@ default: 2 type: integer doc: Weight used in the selection. 0: input state-average weight, 1: 1./(c_0^2), 2: rPT2 matching, 3: variance matching, 4: variance and rPT2 matching, 5: variance minimization and matching, 6: CI coefficients 7: input state-average multiplied by variance and rPT2 matching 8: input state-average multiplied by rPT2 matching 9: input state-average multiplied by variance matching interface: ezfio,provider,ocaml -default: 2 +default: 1 [threshold_generators] type: Threshold diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f index 391d0073..d73b2dbf 100644 --- a/src/determinants/s2.irp.f +++ b/src/determinants/s2.irp.f @@ -8,24 +8,35 @@ double precision function diag_S_mat_elem(key_i,Nint) BEGIN_DOC ! Returns END_DOC - integer :: nup, i - integer(bit_kind) :: xorvec(N_int_max) + integer :: nup, ntot, i + integer(bit_kind) :: xorvec(N_int_max), upvec(N_int_max) do i=1,Nint xorvec(i) = xor(key_i(i,1),key_i(i,2)) enddo do i=1,Nint - xorvec(i) = iand(xorvec(i),key_i(i,1)) + upvec(i) = iand(xorvec(i),key_i(i,1)) enddo + ! nup is number of alpha unpaired + ! ntot is total number of unpaired nup = 0 + ntot = 0 do i=1,Nint if (xorvec(i) /= 0_bit_kind) then - nup += popcnt(xorvec(i)) + ntot += popcnt(xorvec(i)) + if (upvec(i) /= 0_bit_kind) then + nup += popcnt(upvec(i)) + endif endif enddo - diag_S_mat_elem = dble(nup) + + double precision :: sz + sz = nup - 0.5d0*ntot + + ! = + Sz(Sz-1) + diag_S_mat_elem = nup + sz*(sz-1) end @@ -125,7 +136,7 @@ subroutine u_0_S2_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8) call S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8) do i=1,N_st - e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n) + S_z2_Sz + e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n) enddo end diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index 6b164816..ea8e0284 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -1776,12 +1776,12 @@ subroutine ac_operator(iorb,ispin,key,hjj,Nint,na,nb) integer :: k,l,i if (iorb < 1) then - print *, irp_here, 'iorb < 1' + print *, irp_here, ': iorb < 1' print *, iorb, mo_num stop -1 endif if (iorb > mo_num) then - print *, irp_here, 'iorb > mo_num' + print *, irp_here, ': iorb > mo_num' print *, iorb, mo_num stop -1 endif diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f index 716c81ee..232c9e2b 100644 --- a/src/determinants/spindeterminants.irp.f +++ b/src/determinants/spindeterminants.irp.f @@ -354,6 +354,31 @@ subroutine write_spindeterminants call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) +end + +subroutine read_spindeterminants + use bitmasks + implicit none + integer :: k + + call ezfio_get_spindeterminants_n_det(N_det) + call ezfio_get_spindeterminants_n_states(N_states) + TOUCH N_det N_states + + call ezfio_get_spindeterminants_n_det_alpha(N_det_alpha_unique) + call ezfio_get_spindeterminants_n_det_beta(N_det_beta_unique) + call ezfio_get_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values) + call ezfio_get_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows) + call ezfio_get_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns) + call ezfio_get_spindeterminants_psi_det_alpha(psi_det_alpha_unique) + call ezfio_get_spindeterminants_psi_det_beta(psi_det_beta_unique) + do k=1,N_det + psi_bilinear_matrix_order(k) = k + enddo + TOUCH psi_bilinear_matrix_values psi_bilinear_matrix_rows psi_bilinear_matrix_columns N_det_alpha_unique N_det_beta_unique psi_det_alpha_unique psi_det_beta_unique psi_bilinear_matrix_order + + call wf_of_psi_bilinear_matrix(.True.) + end BEGIN_PROVIDER [ double precision, det_alpha_norm, (N_det_alpha_unique) ] diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f index c916e0ef..eb11e28c 100644 --- a/src/fci/pt2.irp.f +++ b/src/fci/pt2.irp.f @@ -28,35 +28,38 @@ end subroutine run implicit none + use selection_types integer :: i,j,k logical, external :: detEq - double precision :: pt2(N_states) + type(pt2_type) :: pt2_data, pt2_data_err integer :: degree integer :: n_det_before, to_select double precision :: threshold_davidson_in - double precision :: E_CI_before(N_states), relative_error, error(N_states), variance(N_states), norm(N_states), rpt2(N_states) + double precision :: relative_error + double precision, allocatable :: E_CI_before(:) - pt2(:) = 0.d0 + allocate ( E_CI_before(N_states)) + call pt2_alloc(pt2_data, N_states) + call pt2_alloc(pt2_data_err, N_states) E_CI_before(:) = psi_energy(:) + nuclear_repulsion relative_error=PT2_relative_error if (do_pt2) then - call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, & - norm,0) ! Stochastic PT2 + call ZMQ_pt2(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2 else - call ZMQ_selection(0, pt2, variance, norm) + call ZMQ_selection(0, pt2_data) endif - do k=1,N_states - rpt2(k) = pt2(k)/(1.d0 + norm(k)) - enddo + call print_summary(psi_energy_with_nucl_rep(1:N_states), & + pt2_data, pt2_data_err, N_det,N_occ_pattern,N_states,psi_s2) - call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm,N_det,N_occ_pattern,N_states,psi_s2) - - call save_energy(E_CI_before,pt2) + call save_energy(E_CI_before, pt2_data % pt2) + call pt2_dealloc(pt2_data) + call pt2_dealloc(pt2_data_err) + deallocate(E_CI_before) end diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index 6f13e595..54845685 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -11,6 +11,7 @@ function run() { qp edit --check qp reset --mos qp set scf_utils n_it_scf_max 50 + qp set ao_one_e_ints lin_dep_cutoff 1.e-50 qp run scf # qp set_frozen_core energy="$(ezfio get hartree_fock energy)" diff --git a/src/iterations/print_summary.irp.f b/src/iterations/print_summary.irp.f index ad87bc8e..d04d8a93 100644 --- a/src/iterations/print_summary.irp.f +++ b/src/iterations/print_summary.irp.f @@ -1,16 +1,17 @@ -subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_st,s2_) +subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_occ_pattern_,n_st,s2_) + use selection_types implicit none BEGIN_DOC ! Print the extrapolated energy in the output END_DOC integer, intent(in) :: n_det_, n_occ_pattern_, n_st - double precision, intent(in) :: e_(n_st), pt2_(n_st), variance_(n_st), norm_(n_st), error_(n_st), s2_(n_st) + double precision, intent(in) :: e_(n_st), s2_(n_st) + type(pt2_type) , intent(in) :: pt2_data, pt2_data_err integer :: i, k integer :: N_states_p character*(9) :: pt2_string character*(512) :: fmt - double precision :: f(n_st) if (do_pt2) then pt2_string = ' ' @@ -20,10 +21,6 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ N_states_p = min(N_det_,n_st) - do i=1,N_states_p - f(i) = 1.d0/(1.d0+norm_(i)) - enddo - print *, '' print '(A,I12)', 'Summary at N_det = ', N_det_ print '(A)', '-----------------------------------' @@ -42,16 +39,16 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0 endif write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2 '//pt2_string, (pt2_(k), error_(k), k=1,N_states_p) - write(*,fmt) '# rPT2'//pt2_string, (pt2_(k)*f(k), error_(k)*f(k), k=1,N_states_p) + write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p) + write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p) write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (e_(k)+pt2_(k),error_(k), k=1,N_states_p) - write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_(k)*f(k),error_(k)*f(k), k=1,N_states_p) + write(*,fmt) '# E+PT2 ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p) + write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p) if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_(k)-e_(1)-pt2_(1)), & - dsqrt(error_(k)*error_(k)+error_(1)*error_(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_(k)-e_(1)-pt2_(1))*27.211396641308d0, & - dsqrt(error_(k)*error_(k)+error_(1)*error_(1))*27.211396641308d0, k=1,N_states_p) + write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), & + dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p) + write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, & + dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p) endif write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' write(*,fmt) @@ -68,12 +65,12 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ print*,'* State ',k print *, '< S^2 > = ', s2_(k) print *, 'E = ', e_(k) - print *, 'Variance = ', variance_(k) - print *, 'PT norm = ', dsqrt(norm_(k)) - print *, 'PT2 = ', pt2_(k) - print *, 'rPT2 = ', pt2_(k)*f(k) - print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_(k), ' +/- ', error_(k) - print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_(k)*f(k), ' +/- ', error_(k)*f(k) + print *, 'Variance = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k) + print *, 'PT norm = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k)) + print *, 'PT2 = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) + print *, 'rPT2 = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) + print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) + print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) print *, '' enddo @@ -87,14 +84,14 @@ subroutine print_summary(e_,pt2_,error_,variance_,norm_,n_det_,n_occ_pattern_,n_ print *, '-----' print*, 'Variational + perturbative Energy difference (au | eV)' do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_(i) - (e_(1) + pt2_(1))), & - (e_(i)+ pt2_(i) - (e_(1) + pt2_(1))) * 27.211396641308d0 + print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), & + (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0 enddo print *, '-----' print*, 'Variational + renormalized perturbative Energy difference (au | eV)' do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_(i)*f(i) - (e_(1) + pt2_(1)*f(1))), & - (e_(i)+ pt2_(i)*f(i) - (e_(1) + pt2_(1)*f(1))) * 27.211396641308d0 + print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), & + (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0 enddo endif diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index d1236ce7..dc44e262 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -186,10 +186,10 @@ END_DOC implicit none - double precision,intent(in) :: Fock_matrix_DIIS(ao_num,ao_num,*),error_matrix_DIIS(ao_num,ao_num,*) + integer,intent(inout) :: dim_DIIS + double precision,intent(in) :: Fock_matrix_DIIS(ao_num,ao_num,dim_DIIS),error_matrix_DIIS(ao_num,ao_num,dim_DIIS) integer,intent(in) :: iteration_SCF, size_Fock_matrix_AO double precision,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num) - integer,intent(inout) :: dim_DIIS double precision,allocatable :: B_matrix_DIIS(:,:),X_vector_DIIS(:) double precision,allocatable :: C_vector_DIIS(:) @@ -212,11 +212,12 @@ END_DOC ) ! Compute the matrices B and X + B_matrix_DIIS(:,:) = 0.d0 do j=1,dim_DIIS + j_DIIS = min(dim_DIIS,mod(iteration_SCF-j,max_dim_DIIS)+1) do i=1,dim_DIIS - j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1 - i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1 + i_DIIS = min(dim_DIIS,mod(iteration_SCF-i,max_dim_DIIS)+1) ! Compute product of two errors vectors @@ -229,7 +230,6 @@ END_DOC ! Compute Trace - B_matrix_DIIS(i,j) = 0.d0 do k=1,ao_num B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + scratch(k,k) enddo @@ -238,12 +238,11 @@ END_DOC ! Pad B matrix and build the X matrix + C_vector_DIIS(:) = 0.d0 do i=1,dim_DIIS B_matrix_DIIS(i,dim_DIIS+1) = -1.d0 B_matrix_DIIS(dim_DIIS+1,i) = -1.d0 - C_vector_DIIS(i) = 0.d0 enddo - B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0 C_vector_DIIS(dim_DIIS+1) = -1.d0 deallocate(scratch) @@ -259,9 +258,10 @@ END_DOC allocate(AF(dim_DIIS+1,dim_DIIS+1)) allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) ) allocate(scratch(lwork,1)) + scratch(:,1) = 0.d0 anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, & - size(B_matrix_DIIS,1), scratch) + size(B_matrix_DIIS,1), scratch(1,1)) AF(:,:) = B_matrix_DIIS(:,:) call dgetrf(dim_DIIS+1,dim_DIIS+1,AF,size(AF,1),ipiv,info) diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index a7d5b978..417b25ad 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -17,7 +17,7 @@ program molden write(i_unit_output,'(A)') '[Molden Format]' - write(i_unit_output,'(A)') '[Atoms] ANGSTROM' + write(i_unit_output,'(A)') '[Atoms] Angs' do i = 1, nucl_num write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & trim(element_name(int(nucl_charge(i)))), & diff --git a/src/tools/print_energy.irp.f b/src/tools/print_energy.irp.f index 4703e7d4..f78dffc8 100644 --- a/src/tools/print_energy.irp.f +++ b/src/tools/print_energy.irp.f @@ -8,23 +8,26 @@ program print_energy ! psi_coef_sorted are the wave function stored in the |EZFIO| directory. read_wf = .True. touch read_wf + PROVIDE N_states call run end subroutine run implicit none - integer :: i + integer :: i,j double precision :: i_H_psi_array(N_states) double precision :: E(N_states) double precision :: norm(N_states) - E(:) = nuclear_repulsion - norm(:) = 0.d0 + E(1:N_states) = nuclear_repulsion + norm(1:N_states) = 0.d0 do i=1,N_det call i_H_psi(psi_det(1,1,i), psi_det, psi_coef, N_int, N_det, & size(psi_coef,1), N_states, i_H_psi_array) - norm(:) += psi_coef(i,:)**2 - E(:) += i_H_psi_array(:) * psi_coef(i,:) + do j=1,N_states + norm(j) += psi_coef(i,j)*psi_coef(i,j) + E(j) += i_H_psi_array(j) * psi_coef(i,j) + enddo enddo print *, 'Energy:' diff --git a/src/zmq/utils.irp.f b/src/zmq/utils.irp.f index 503dedd2..07e3a88f 100644 --- a/src/zmq/utils.irp.f +++ b/src/zmq/utils.irp.f @@ -585,6 +585,7 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,name_in) stop 'Wrong end of job' endif + message = repeat(' ',512) do i=360,1,-1 rc = f77_zmq_send(zmq_to_qp_run_socket, 'end_job '//trim(zmq_state),8+len(trim(zmq_state)),0) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 512, 0) @@ -645,6 +646,7 @@ integer function connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) endif endif + message = repeat(' ',512) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) message = trim(message(1:rc)) if(message(1:5) == "error") then diff --git a/travis/compilation.sh b/travis/compilation.sh new file mode 100755 index 00000000..1aa26dda --- /dev/null +++ b/travis/compilation.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Stage 2 + +# Extract cache from config stage +cd ../ +tar -zxf $HOME/cache/config.tgz + +# Configure QP2 +cd qp2 +source ./quantum_package.rc +ninja -j 1 -v + +# Create cache +cd .. +tar -zcf $HOME/cache/compil.tgz qp2 && rm $HOME/cache/config.tgz + diff --git a/travis/configuration.sh b/travis/configuration.sh new file mode 100755 index 00000000..fa52e793 --- /dev/null +++ b/travis/configuration.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# Stage 1 + +# Configure QP2 +./configure --install all --config ./config/travis.cfg + +# Create cache +cd ../ +tar -zcf $HOME/cache/config.tgz qp2 + diff --git a/travis/testing.sh b/travis/testing.sh new file mode 100755 index 00000000..b2122f5c --- /dev/null +++ b/travis/testing.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Stage 3 + +# Extract cache from compile stage +cd ../ +tar -zxf $HOME/cache/compil.tgz + +# Configure QP2 +cd qp2 +source ./quantum_package.rc +qp_test -a && rm $HOME/cache/compil.tgz + + + + +