From 4098b0520222ba9fb527d1c4ade331520d5f8a69 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Sep 2018 15:19:51 +0200 Subject: [PATCH 01/28] Squashed commit of the following: commit 96715abd7bc0645b994fc4fff1c764e7bec3042e Author: Anthony Scemama Date: Thu Sep 6 12:08:34 2018 +0200 Tasks commit e43b1e2faff5ad1ec4ecd2eb99e8d86e0000a9a2 Author: Anthony Scemama Date: Thu Sep 6 11:47:02 2018 +0200 Fixed print commit c498c8944b5695b953909711a2e9a542e1bc6015 Author: Anthony Scemama Date: Thu Sep 6 11:11:51 2018 +0200 PT2 and shiftedBk fixed commit 965cf0361d54df096c9bfca93f9d50ecd946c198 Author: Anthony Scemama Date: Wed Sep 5 18:48:59 2018 +0200 Shifted Bk multistate broken commit 87ef641b65a122fa1256605cbe098c1a0de04bc0 Author: Anthony Scemama Date: Wed Sep 5 17:23:38 2018 +0200 PT2 fixed commit a2adb533bcaf96191a24ff5fcef86cd14ac00697 Author: Anthony Scemama Date: Wed Sep 5 16:55:05 2018 +0200 Working on PT2 (broken) commit 33f52991b65ac42ced5521ef0e713df765268860 Author: Anthony Scemama Date: Wed Sep 5 12:13:23 2018 +0200 Fixed missing argument commit 712bf75f76421880299dc65b30acfda3d531f709 Author: Anthony Scemama Date: Wed Sep 5 11:42:31 2018 +0200 Fixed floating invalid in PT2 commit cf2412ebd99f9acf573a5180603611f1c3e35155 Author: Anthony Scemama Date: Wed Sep 5 11:34:37 2018 +0200 n_states_diag >= n_states commit bb415435e4d9be72a285ec789c3d63046a9173e4 Author: Anthony Scemama Date: Wed Sep 5 11:23:41 2018 +0200 Fixed final print commit f2d339cf7b3535359bfc9d3057225b5957cf9e16 Merge: cfa8e1dc 52ca18c1 Author: Yann Garniron Date: Tue Sep 4 20:51:14 2018 +0200 Merge branch 'thesis' of ssh://github.com/garniron/quantum_package into thesis commit cfa8e1dc34c78f666aad677d2362a0b23e738e55 Author: Yann Garniron Date: Tue Sep 4 20:50:09 2018 +0200 restored relative_error commit bccad69c7785d79eb079bd15cd8d7143503ba9b5 Author: Yann Garniron Date: Tue Sep 4 20:47:11 2018 +0200 uninitialized variable commit 52ca18c1525cbe51f290813a4706928f227af142 Author: Yann Garniron Date: Tue Sep 4 20:47:11 2018 +0200 uninitialized variable commit ba0094f5f88153da295f479568bea9c6f494098c Merge: 093e3fd0 68458296 Author: Yann Garniron Date: Tue Sep 4 20:07:15 2018 +0200 merge with garniroy commit 68458296dcc86c57bab25582e138ae057a66f019 Author: Anthony Scemama Date: Tue Sep 4 18:43:39 2018 +0200 Almost working but still broken commit 9ebb88cbf32eed90ed2c1ff985128f271bcc1c15 Author: Anthony Scemama Date: Tue Sep 4 18:05:00 2018 +0200 Cleaning commit 873035e01635a1a0576cc0e8bf834beb3c60fc80 Author: Anthony Scemama Date: Tue Sep 4 17:31:45 2018 +0200 Squashed commit of the following: commit 4b9c435dce0f3b3078d573e66fd32b40fca26497 Merge: 74e559c8 093e3fd0 Author: Anthony Scemama Date: Tue Sep 4 16:58:51 2018 +0200 Merge branch 'thesis' of git://github.com/garniron/quantum_package into garniron-thesis commit 093e3fd021ca5ed73fe3be530fa18e79ceb0085e Author: Yann Garniron Date: Tue Sep 4 16:13:00 2018 +0200 removed ungodly hack commit 8529a0f3f66d2f1ba9d717b0de60b268a4a821a7 Author: Yann Garniron Date: Tue Sep 4 14:57:19 2018 +0200 reduced prints in pt2_stoch commit 03b8f353bd94b837c02e570a8d3fb68689003135 Author: Yann Garniron Date: Tue Sep 4 14:41:46 2018 +0200 teeth building check for pt2_stoch commit 0d91b9310a7d580e8bc97a6a0af7a62a8506f8c0 Author: Yann Garniron Date: Tue Sep 4 14:35:04 2018 +0200 timestamp of first pull commit 34d9fa01657ed5d159a1c62f8f24cdb266d48153 Author: Yann Garniron Date: Tue Sep 4 14:27:10 2018 +0200 potential numerical precision bug commit 9a0f900d8c57bcbca36a09c11616e234528e1643 Author: Yann Garniron Date: Tue Sep 4 14:09:51 2018 +0200 tests if teeth can be built commit dda0dc34df82a59fac3c3abb2b14943fdb23c0f8 Author: Yann Garniron Date: Mon Sep 3 17:48:04 2018 +0200 corrected pt2_find_sample commit a521f0cb82bd333ce9879b4dfd87ae1abcf2baaa Author: Yann Garniron Date: Mon Sep 3 16:08:02 2018 +0200 tasks get by batches of Nproc commit 997a5a1265951394753d3154a6136ca40177ba8d Author: Yann Garniron Date: Mon Sep 3 14:18:04 2018 +0200 buffered task_id send commit 99ea7948e0d58023f7ce02386014c8c3d493deb4 Author: Yann Garniron Date: Mon Sep 3 12:29:12 2018 +0200 unbalanced fragmentation commit abb3b7e08bebdcaf84208ebbe404441d3571b1f9 Author: Yann Garniron Date: Sun Sep 2 17:18:44 2018 +0200 overflow of pt2_J commit 8df49f394b588ca304ce2ee7d8f8095455573784 Author: Yann Garniron Date: Sun Sep 2 15:58:48 2018 +0200 removed useless computation of intermediate checkpoints commit 4ba5b79eb391819a4966c7cd19b2e2fdf2ed5129 Author: Yann Garniron Date: Sun Sep 2 15:50:14 2018 +0200 dressing only sent for chosen checkpoint commit a4a6a69459321b1b210c2f51ffbef4f792943919 Author: Yann Garniron Date: Sat Sep 1 17:01:56 2018 +0200 cumulative dot_F commit 6a7f04cb79ec921d5829a011aa8d8b2ea224760d Author: Yann Garniron Date: Sat Sep 1 16:58:07 2018 +0200 simpler purge commit 168ca2f2e29b902ab37afc58672b678998f0212a Author: Yann Garniron Date: Fri Aug 31 21:07:01 2018 +0200 task list optimized commit de4a0d0caf0e1d441e0244a9ba7c65ae535d58b1 Author: Yann Garniron Date: Fri Aug 31 18:57:03 2018 +0200 removed print commit fee31d4e3e70c2edf1aeda202d3f067eddc17532 Author: Yann Garniron Date: Fri Aug 31 18:56:23 2018 +0200 dress fragmentation commit 02893a419de07c922708fe8ee2b828d9ebfd2e9b Author: Yann Garniron Date: Fri Aug 31 15:52:16 2018 +0200 bug in blocked search - replaced with thesis version commit bb6e073cf10039f8a333d120b88dda109169528b Author: Yann Garniron Date: Thu Aug 30 21:24:45 2018 +0200 ungodly hack to prevent double providing commit 0609e8c627ea7bf88d617e45dffeb64c9aea37b6 Author: Yann Garniron Date: Thu Aug 30 20:52:05 2018 +0200 debugging commit a254fdd7cffd8348846c910f240965104c969a09 Author: Yann Garniron Date: Thu Aug 30 15:24:07 2018 +0200 parallel bug commit 2a6c1941d45be78083ec8c1dccb453f51a12c338 Author: Yann Garniron Date: Thu Aug 30 11:43:11 2018 +0200 corrected when relative_error=0d0 commit bac039bdf1bd00673c0795b1cb23adb593e9d426 Author: Yann Garniron Date: Thu Aug 30 10:58:17 2018 +0200 relative error 1d-5 commit aae9d203ecbf8d53accc6bc35c2ab56aeeae78a6 Author: Yann Garniron Date: Thu Aug 30 10:07:02 2018 +0200 potential fragmentation bug commit ad69f39f99d0b0dd73f556fb13d1d55337c5b066 Author: Yann Garniron Date: Wed Aug 29 20:54:58 2018 +0200 dress_zmq re-implemented commit d78f64732a5493d7f10c7c80b564005e63a133fc Author: Yann Garniron Date: Wed Aug 29 11:30:19 2018 +0200 pt2_stoch re-implemented commit 4b9b54e19ac7459589681e5ff7aa358dde9f5fd5 Author: Yann Garniron Date: Tue Aug 28 10:24:38 2018 +0200 removed test for phase_mask_bit commit 3abccca5e35948e54a659cacccea42fbfcf4c296 Author: Yann Garniron Date: Fri Aug 3 23:44:05 2018 +0200 phasemask_bit commit 093e3fd021ca5ed73fe3be530fa18e79ceb0085e Author: Yann Garniron Date: Tue Sep 4 16:13:00 2018 +0200 removed ungodly hack commit 8529a0f3f66d2f1ba9d717b0de60b268a4a821a7 Author: Yann Garniron Date: Tue Sep 4 14:57:19 2018 +0200 reduced prints in pt2_stoch commit 03b8f353bd94b837c02e570a8d3fb68689003135 Author: Yann Garniron Date: Tue Sep 4 14:41:46 2018 +0200 teeth building check for pt2_stoch commit 0d91b9310a7d580e8bc97a6a0af7a62a8506f8c0 Author: Yann Garniron Date: Tue Sep 4 14:35:04 2018 +0200 timestamp of first pull commit 34d9fa01657ed5d159a1c62f8f24cdb266d48153 Author: Yann Garniron Date: Tue Sep 4 14:27:10 2018 +0200 potential numerical precision bug commit 9a0f900d8c57bcbca36a09c11616e234528e1643 Author: Yann Garniron Date: Tue Sep 4 14:09:51 2018 +0200 tests if teeth can be built commit dda0dc34df82a59fac3c3abb2b14943fdb23c0f8 Author: Yann Garniron Date: Mon Sep 3 17:48:04 2018 +0200 corrected pt2_find_sample commit a521f0cb82bd333ce9879b4dfd87ae1abcf2baaa Author: Yann Garniron Date: Mon Sep 3 16:08:02 2018 +0200 tasks get by batches of Nproc commit 997a5a1265951394753d3154a6136ca40177ba8d Author: Yann Garniron Date: Mon Sep 3 14:18:04 2018 +0200 buffered task_id send commit 99ea7948e0d58023f7ce02386014c8c3d493deb4 Author: Yann Garniron Date: Mon Sep 3 12:29:12 2018 +0200 unbalanced fragmentation commit abb3b7e08bebdcaf84208ebbe404441d3571b1f9 Author: Yann Garniron Date: Sun Sep 2 17:18:44 2018 +0200 overflow of pt2_J commit 8df49f394b588ca304ce2ee7d8f8095455573784 Author: Yann Garniron Date: Sun Sep 2 15:58:48 2018 +0200 removed useless computation of intermediate checkpoints commit 4ba5b79eb391819a4966c7cd19b2e2fdf2ed5129 Author: Yann Garniron Date: Sun Sep 2 15:50:14 2018 +0200 dressing only sent for chosen checkpoint commit a4a6a69459321b1b210c2f51ffbef4f792943919 Author: Yann Garniron Date: Sat Sep 1 17:01:56 2018 +0200 cumulative dot_F commit 6a7f04cb79ec921d5829a011aa8d8b2ea224760d Author: Yann Garniron Date: Sat Sep 1 16:58:07 2018 +0200 simpler purge commit 168ca2f2e29b902ab37afc58672b678998f0212a Author: Yann Garniron Date: Fri Aug 31 21:07:01 2018 +0200 task list optimized commit de4a0d0caf0e1d441e0244a9ba7c65ae535d58b1 Author: Yann Garniron Date: Fri Aug 31 18:57:03 2018 +0200 removed print commit fee31d4e3e70c2edf1aeda202d3f067eddc17532 Author: Yann Garniron Date: Fri Aug 31 18:56:23 2018 +0200 dress fragmentation commit 02893a419de07c922708fe8ee2b828d9ebfd2e9b Author: Yann Garniron Date: Fri Aug 31 15:52:16 2018 +0200 bug in blocked search - replaced with thesis version commit bb6e073cf10039f8a333d120b88dda109169528b Author: Yann Garniron Date: Thu Aug 30 21:24:45 2018 +0200 ungodly hack to prevent double providing commit 0609e8c627ea7bf88d617e45dffeb64c9aea37b6 Author: Yann Garniron Date: Thu Aug 30 20:52:05 2018 +0200 debugging commit a254fdd7cffd8348846c910f240965104c969a09 Author: Yann Garniron Date: Thu Aug 30 15:24:07 2018 +0200 parallel bug commit 2a6c1941d45be78083ec8c1dccb453f51a12c338 Author: Yann Garniron Date: Thu Aug 30 11:43:11 2018 +0200 corrected when relative_error=0d0 commit bac039bdf1bd00673c0795b1cb23adb593e9d426 Author: Yann Garniron Date: Thu Aug 30 10:58:17 2018 +0200 relative error 1d-5 commit aae9d203ecbf8d53accc6bc35c2ab56aeeae78a6 Author: Yann Garniron Date: Thu Aug 30 10:07:02 2018 +0200 potential fragmentation bug commit ad69f39f99d0b0dd73f556fb13d1d55337c5b066 Author: Yann Garniron Date: Wed Aug 29 20:54:58 2018 +0200 dress_zmq re-implemented commit d78f64732a5493d7f10c7c80b564005e63a133fc Author: Yann Garniron Date: Wed Aug 29 11:30:19 2018 +0200 pt2_stoch re-implemented commit 4b9b54e19ac7459589681e5ff7aa358dde9f5fd5 Author: Yann Garniron Date: Tue Aug 28 10:24:38 2018 +0200 removed test for phase_mask_bit commit 3abccca5e35948e54a659cacccea42fbfcf4c296 Author: Yann Garniron Date: Fri Aug 3 23:44:05 2018 +0200 phasemask_bit --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 7 +- plugins/Full_CI_ZMQ/fci_zmq_nos.irp.f | 246 ---- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 34 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 763 +++++------- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 21 +- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 120 +- plugins/Full_CI_ZMQ/selection.irp.f | 23 +- .../selection_davidson_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 22 +- plugins/Generators_full/generators.irp.f | 4 +- plugins/Perturbation/EZFIO.cfg | 6 - plugins/dress_zmq/EZFIO.cfg.example | 6 + plugins/dress_zmq/alpha_factory.irp.f | 25 +- plugins/dress_zmq/dress_general.irp.f | 10 +- plugins/dress_zmq/dress_slave.irp.f | 4 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 1064 ++++++++--------- plugins/dress_zmq/dressing.irp.f | 103 +- plugins/dress_zmq/run_dress_slave.irp.f | 533 ++++----- plugins/shiftedbk/EZFIO.cfg | 6 + plugins/shiftedbk/shifted_bk_iter.irp.f | 159 --- plugins/shiftedbk/shifted_bk_routines.irp.f | 3 +- src/Davidson/EZFIO.cfg | 2 +- src/Davidson/ezfio.irp.f | 35 + src/Davidson/u0Hu0.irp.f | 1 + src/Determinants/slater_rules.irp.f | 134 ++- 26 files changed, 1330 insertions(+), 2005 deletions(-) delete mode 100644 plugins/Full_CI_ZMQ/fci_zmq_nos.irp.f delete mode 100644 plugins/shiftedbk/shifted_bk_iter.irp.f create mode 100644 src/Davidson/ezfio.irp.f diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 933056d4..0ec775a9 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -10,12 +10,11 @@ program fci_zmq double precision :: hf_energy_ref logical :: has - double precision :: relative_error, absolute_error + double precision :: relative_error integer :: N_states_p character*(512) :: fmt relative_error=PT2_relative_error - absolute_error=PT2_absolute_error pt2 = -huge(1.e0) threshold_davidson_in = threshold_davidson @@ -72,7 +71,7 @@ program fci_zmq threshold_selectors = 1.d0 threshold_generators = 1.d0 SOFT_TOUCH threshold_selectors threshold_generators - call ZMQ_pt2(CI_energy, pt2,relative_error,absolute_error,error) ! Stochastic PT2 + call ZMQ_pt2(CI_energy, pt2,relative_error,error) ! Stochastic PT2 threshold_selectors = threshold_selectors_save threshold_generators = threshold_generators_save SOFT_TOUCH threshold_selectors threshold_generators @@ -184,7 +183,7 @@ program fci_zmq threshold_selectors = 1.d0 threshold_generators = 1d0 SOFT_TOUCH threshold_selectors threshold_generators - call ZMQ_pt2(CI_energy, pt2,relative_error,absolute_error,error) ! Stochastic PT2 + call ZMQ_pt2(CI_energy, pt2,relative_error,error) ! Stochastic PT2 threshold_selectors = threshold_selectors_save threshold_generators = threshold_generators_save SOFT_TOUCH threshold_selectors threshold_generators diff --git a/plugins/Full_CI_ZMQ/fci_zmq_nos.irp.f b/plugins/Full_CI_ZMQ/fci_zmq_nos.irp.f deleted file mode 100644 index 01405ef8..00000000 --- a/plugins/Full_CI_ZMQ/fci_zmq_nos.irp.f +++ /dev/null @@ -1,246 +0,0 @@ -program fci_zmq - implicit none - integer :: i,j,k - double precision, allocatable :: pt2(:) - integer :: degree - integer :: n_det_before, to_select - double precision :: threshold_davidson_in - - allocate (pt2(N_states)) - - double precision :: hf_energy_ref - logical :: has - double precision :: relative_error, absolute_error - integer :: N_states_p - character*(512) :: fmt - - relative_error=PT2_relative_error - absolute_error=PT2_absolute_error - - pt2 = -huge(1.e0) - threshold_davidson_in = threshold_davidson - threshold_davidson = threshold_davidson_in * 100.d0 - SOFT_TOUCH threshold_davidson - - call diagonalize_CI - call save_wavefunction - - call ezfio_has_hartree_fock_energy(has) - if (has) then - call ezfio_get_hartree_fock_energy(hf_energy_ref) - else - hf_energy_ref = ref_bitmask_energy - endif - - if (N_det > N_det_max) then - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - N_det = N_det_max - soft_touch N_det psi_det psi_coef - call diagonalize_CI - call save_wavefunction - N_states_p = min(N_det,N_states) - endif - - n_det_before = 0 - - character*(8) :: pt2_string - double precision :: correlation_energy_ratio - double precision :: threshold_selectors_save, threshold_generators_save - threshold_selectors_save = threshold_selectors - threshold_generators_save = threshold_generators - double precision :: error(N_states) - - correlation_energy_ratio = 0.d0 - - if (.True.) then ! Avoid pre-calculation of CI_energy - do while ( & - (N_det < N_det_max) .and. & - (maxval(abs(pt2(1:N_states))) > pt2_max) .and. & - (correlation_energy_ratio <= correlation_energy_ratio_max) & - ) - write(*,'(A)') '--------------------------------------------------------------------------------' - - - if (do_pt2) then - pt2_string = ' ' - pt2 = 0.d0 - threshold_selectors = 1.d0 - threshold_generators = 1d0 - SOFT_TOUCH threshold_selectors threshold_generators - call ZMQ_pt2(CI_energy, pt2,relative_error,absolute_error,error) ! Stochastic PT2 - threshold_selectors = threshold_selectors_save - threshold_generators = threshold_generators_save - SOFT_TOUCH threshold_selectors threshold_generators - else - pt2_string = '(approx)' - endif - - - correlation_energy_ratio = (CI_energy(1) - hf_energy_ref) / & - (CI_energy(1) + pt2(1) - hf_energy_ref) - correlation_energy_ratio = min(1.d0,correlation_energy_ratio) - - N_states_p = min(N_det,N_states) - - print *, '' - print '(A,I12)', 'Summary at N_det = ', N_det - print '(A)', '-----------------------------------' - print *, '' - call write_double(6,correlation_energy_ratio, 'Correlation ratio') - print *, '' - - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(12X,', N_states_p, '(6X,A7,1X,I6,10X))' - write(*,fmt) ('State',k, k=1,N_states_p) - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(A12,', N_states_p, '(1X,F14.8,15X))' - write(*,fmt) '# E ', CI_energy(1:N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', CI_energy(1:N_states_p)-CI_energy(1) - write(*,fmt) '# Excit. (eV)', (CI_energy(1:N_states_p)-CI_energy(1))*27.211396641308d0 - endif - write(fmt,*) '(A12,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) - write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (CI_energy(k)+pt2(k),error(k), k=1,N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1)), & - dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1))*27.211396641308d0, & - dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) - endif - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - print *, '' - - print *, 'N_det = ', N_det - print *, 'N_states = ', N_states - print*, 'correlation_ratio = ', correlation_energy_ratio - - do k=1, N_states_p - print*,'State ',k - print *, 'PT2 = ', pt2(k) - print *, 'E = ', CI_energy(k) - print *, 'E+PT2'//pt2_string//' = ', CI_energy(k)+pt2(k), ' +/- ', error(k) - enddo - - print *, '-----' - if(N_states.gt.1)then - print *, 'Variational Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (CI_energy(i) - CI_energy(1)), & - (CI_energy(i) - CI_energy(1)) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (CI_energy(i)+ pt2(i) - (CI_energy(1) + pt2(1))), & - (CI_energy(i)+ pt2(i) - (CI_energy(1) + pt2(1))) * 27.211396641308d0 - enddo - endif - call ezfio_set_full_ci_zmq_energy_pt2(CI_energy(1)+pt2(1)) - call dump_fci_iterations_value(N_det,CI_energy,pt2) - - n_det_before = N_det - if (s2_eig) then - to_select = N_det/2+1 - to_select = max(N_det/2+1, to_select) - to_select = min(to_select, N_det_max-n_det_before) - else - to_select = N_det - to_select = max(N_det, to_select) - to_select = min(to_select, N_det_max-n_det_before) - endif - call save_natural_mos - call map_deinit(mo_integrals_map) - FREE mo_integrals_map - PROVIDE mo_integrals_map - call four_index_transform_block(ao_integrals_map,mo_integrals_map, & - mo_coef, size(mo_coef,1), & - 1, 1, 1, 1, ao_num, ao_num, ao_num, ao_num, & - 1, 1, 1, 1, mo_num, mo_num, mo_num, mo_num) - - call ZMQ_selection(to_select, pt2) - - PROVIDE psi_coef - PROVIDE psi_det - PROVIDE psi_det_sorted - - if (N_det >= N_det_max) then - threshold_davidson = threshold_davidson_in - end if - call diagonalize_CI - call save_wavefunction - call ezfio_set_full_ci_zmq_energy(CI_energy(1)) - enddo - endif - - if (N_det < N_det_max) then - threshold_davidson = threshold_davidson_in - call diagonalize_CI - call save_wavefunction - call ezfio_set_full_ci_zmq_energy(CI_energy(1)) - call ezfio_set_full_ci_zmq_energy_pt2(CI_energy(1)+pt2(1)) - endif - - if (do_pt2) then - pt2 = 0.d0 - threshold_selectors = 1.d0 - threshold_generators = 1d0 - SOFT_TOUCH threshold_selectors threshold_generators - call ZMQ_pt2(CI_energy, pt2,relative_error,absolute_error,error) ! Stochastic PT2 - threshold_selectors = threshold_selectors_save - threshold_generators = threshold_generators_save - SOFT_TOUCH threshold_selectors threshold_generators - call ezfio_set_full_ci_zmq_energy(CI_energy(1)) - call ezfio_set_full_ci_zmq_energy_pt2(CI_energy(1)+pt2(1)) - endif - print *, 'N_det = ', N_det - print *, 'N_states = ', N_states - print*, 'correlation_ratio = ', correlation_energy_ratio - - - call dump_fci_iterations_value(N_det,CI_energy,pt2) - - print *, '' - print '(A,I12)', 'Summary at N_det = ', N_det - print '(A)', '-----------------------------------' - print *, '' - call write_double(6,correlation_energy_ratio, 'Correlation ratio') - print *, '' - - - N_states_p = min(N_det,N_states) - print *, '' - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(12X,', N_states_p, '(6X,A7,1X,I6,10X))' - write(*,fmt) ('State',k, k=1,N_states_p) - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(A12,', N_states_p, '(1X,F14.8,15X))' - write(*,fmt) '# E ', CI_energy(1:N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', CI_energy(1:N_states_p)-CI_energy(1) - write(*,fmt) '# Excit. (eV)', (CI_energy(1:N_states_p)-CI_energy(1))*27.211396641308d0 - endif - write(fmt,*) '(A12,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) - write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (CI_energy(k)+pt2(k),error(k), k=1,N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1)), & - dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (CI_energy(k)+pt2(k)-CI_energy(1)-pt2(1))*27.211396641308d0, & - dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) - endif - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - print *, '' - - - -end diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f index 83146fb0..e29fe3dc 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -12,30 +12,30 @@ subroutine run integer :: i,j,k logical, external :: detEq - double precision, allocatable :: pt2(:) + double precision :: pt2(N_states) integer :: degree integer :: n_det_before, to_select double precision :: threshold_davidson_in - - double precision :: E_CI_before, relative_error, absolute_error, eqt - - allocate (pt2(N_states)) + + double precision :: E_CI_before(N_states), relative_error, error(N_states) + pt2(:) = 0.d0 - E_CI_before = psi_energy(1) + nuclear_repulsion + E_CI_before(:) = psi_energy(:) + nuclear_repulsion threshold_selectors = 1.d0 - threshold_generators = 1.d0 + threshold_generators = 1.d0 relative_error=PT2_relative_error - absolute_error=PT2_absolute_error - - call ZMQ_pt2(E_CI_before, pt2, relative_error, absolute_error, eqt) - print *, 'Final step' - print *, 'N_det = ', N_det - print *, 'PT2 = ', pt2 - print *, 'E = ', E_CI_before - print *, 'E+PT2 = ', E_CI_before+pt2, ' +/- ', eqt - print *, '-----' - call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before+pt2(1)) + + call ZMQ_pt2(E_CI_before, pt2, relative_error, error) + do k=1,N_states + print *, 'State ', k + print *, 'N_det = ', N_det + print *, 'PT2 = ', pt2 + print *, 'E = ', E_CI_before(k) + print *, 'E+PT2 = ', E_CI_before(k)+pt2(k), ' +/- ', error(k) + print *, '-----' + enddo + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) end diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index e6e2418f..3c8e797b 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,32 +1,108 @@ -BEGIN_PROVIDER [ integer, fragment_first ] - implicit none - fragment_first = first_det_of_teeth(1) +BEGIN_PROVIDER [ integer, pt2_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic PT2 + END_DOC + pt2_stoch_istate = 1 END_PROVIDER -subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] + implicit none + logical, external :: testTeethBuilding + integer :: i + pt2_F(:) = 1 + pt2_n_tasks_max = N_det_generators/100 + 1 + do i=1,N_det_generators + if (maxval(dabs(psi_coef_sorted_gen(i,:))) > 0.005d0) then + pt2_F(i) = max(1,min( ((elec_alpha_num-n_core_orb)**2)/4, pt2_n_tasks_max)) + endif + enddo + + if(N_det_generators < 1024) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = min(5, N_det_generators) + do pt2_N_teeth=100,2,-1 + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do + end if + call write_int(6,pt2_N_teeth,'Number of comb teeth') +END_PROVIDER + + +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + do i=1,N_det_generators + tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 + 1.d-20 + enddo + + double precision :: norm + norm = 0.d0 + do i=N_det_generators,1,-1 + norm += tilde_w(i) + enddo + + tilde_w(:) = tilde_w(:) / norm + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + + n0 = 0 + testTeethBuilding = .false. + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if (dabs(Wt) <= 1.d-3) then + return + endif + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + return + end if + end do + stop "exited testTeethBuilding" +end function + + + +subroutine ZMQ_pt2(E, pt2,relative_error, error) use f77_zmq use selection_types implicit none - character(len=64000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull - type(selection_buffer) :: b integer, external :: omp_get_thread_num - double precision, intent(in) :: relative_error, absolute_error, E(N_states) + double precision, intent(in) :: relative_error, E(N_states) double precision, intent(out) :: pt2(N_states),error(N_states) - double precision, allocatable :: pt2_detail(:,:), comb(:) - logical, allocatable :: computed(:) - integer, allocatable :: tbc(:) - integer :: i, j, k, Ncomb, i_generator_end - integer, external :: pt2_find + integer :: i - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, external :: omp_get_wtime double precision :: state_average_weight_save(N_states), w(N_states) - double precision :: time integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket if (N_det < max(10,N_states)) then @@ -40,26 +116,9 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) state_average_weight(:) = 0.d0 state_average_weight(pt2_stoch_istate) = 1.d0 TOUCH state_average_weight pt2_stoch_istate - - allocate(pt2_detail(N_states,N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) - sumabove = 0d0 - sum2above = 0d0 - Nabove = 0d0 - - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight psi_selectors - - computed = .false. - - tbc(0) = first_det_of_comb - 1 - do i=1, tbc(0) - tbc(i) = i - computed(i) = .true. - end do - - Ncomb=size(comb) - call get_carlo_workbatch(computed, comb, Ncomb, tbc) - pt2_detail = 0d0 + provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors + print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' print *, '========== ================= ================= =================' @@ -97,16 +156,15 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) endif - call create_selection_buffer(1, 1*2, b) - - integer :: ipos - ipos=1 - integer, external :: add_task_to_taskserver - - do i=1,tbc(0) - if(tbc(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) + character(len=64000) :: task + integer :: j,k,ipos + ipos=1 + task = ' ' + + do i= 1, N_det_generators + do j=1,pt2_F(pt2_J(i)) + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) ipos += 20 if (ipos > 63980) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then @@ -114,19 +172,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) endif ipos=1 endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if - end do + end do + enddo if (ipos > 1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' @@ -149,24 +196,26 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) nproc_target = min(nproc_target,nproc) endif + call omp_set_nested(.true.) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call pt2_collector(zmq_socket_pull,E(pt2_stoch_istate), b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, w, error) + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w, error) pt2(pt2_stoch_istate) = w(pt2_stoch_istate) else call pt2_slave_inproc(i) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - call delete_selection_buffer(b) print *, '========== ================= ================= =================' - deallocate(pt2_detail, comb, computed, tbc) enddo - FREE pt2_stoch_istate +! call omp_set_nested(.false.) + + FREE pt2_stoch_istate state_average_weight(:) = state_average_weight_save(:) TOUCH state_average_weight endif @@ -177,34 +226,6 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) end subroutine -subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) - integer, intent(in) :: tbc(0:size_tbc), Ncomb - logical, intent(in) :: computed(N_det_generators) - double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states,N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - integer :: i, dets(comb_teeth) - double precision :: myVal, myVal2 - - mainLoop : do i=1,Ncomb - call get_comb(comb(i), dets, comb_teeth) - do j=1,comb_teeth - if(.not.(computed(dets(j)))) then - exit mainLoop - end if - end do - - myVal = 0d0 - myVal2 = 0d0 - do j=comb_teeth,1,-1 - myVal += pt2_detail(pt2_stoch_istate,dets(j)) * pt2_weight_inv(dets(j)) * comb_step - sumabove(j) += myVal - sum2above(j) += myVal*myVal - Nabove(j) += 1 - end do - end do mainLoop -end subroutine - - subroutine pt2_slave_inproc(i) implicit none integer, intent(in) :: i @@ -212,411 +233,289 @@ subroutine pt2_slave_inproc(i) call run_pt2_slave(1,i,pt2_e0_denominator) end -subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, pt2,error) + +subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error) use f77_zmq use selection_types use bitmasks implicit none - integer, intent(in) :: Ncomb integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) - double precision, intent(in) :: comb(Ncomb), relative_error, absolute_error, E - logical, intent(inout) :: computed(N_det_generators) - integer, intent(in) :: tbc(0:size_tbc) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - double precision, intent(out) :: pt2(N_states),error(N_states) + double precision, intent(in) :: relative_error, E + double precision, intent(out) :: pt2(N_states), error(N_states) - type(selection_buffer), intent(inout) :: b - double precision, allocatable :: pt2_mwen(:,:) + double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, external :: zmq_delete_tasks + integer, external :: zmq_abort + integer, external :: pt2_find_sample - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, n_tasks - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) + integer :: more, n, i, p, c, t, n_tasks, U integer, allocatable :: task_id(:) integer, allocatable :: index(:) - double precision :: time0 - double precision :: time, timeLast, Nabove_old + double precision, external :: omp_get_wtime - integer :: tooth, firstTBDcomb, orgTBDcomb, n_tasks_max - integer, allocatable :: parts_to_get(:) - logical, allocatable :: actually_computed(:) - double precision :: eqt - character*(512) :: task - Nabove_old = -1.d0 - n_tasks_max = N_det_generators/100+1 + double precision :: v, x, avg, eqt, E0 + double precision :: time, time0 - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & - pt2_mwen(N_states, n_tasks_max) ) - - pt2_mwen(1:N_states, 1:n_tasks_max) = 0.d0 - do i=1,N_det_generators - actually_computed(i) = computed(i) - enddo - - parts_to_get(:) = 1 - if(fragment_first > 0) then - do i=1,fragment_first - parts_to_get(i) = fragment_count - enddo - endif - - do i=1,tbc(0) - actually_computed(tbc(i)) = .false. - end do - - orgTBDcomb = int(Nabove(1)) - firstTBDcomb = 1 + integer, allocatable :: f(:) + logical, allocatable :: d(:) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(n_tasks_max), index(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) + allocate(d(N_det_generators+1)) + allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + + pt2(:) = -huge(1.) + S(:) = 0d0 + S2(:) = 0d0 + n = 1 + t = 0 + U = 0 + eI(:,:) = 0d0 + f(:) = pt2_F(:) + d(:) = .false. + n_tasks = 0 + E0 = E more = 1 - call wall_time(time0) - timeLast = time0 + time0 = omp_get_wtime() - call get_first_tooth(actually_computed, tooth) - Nabove_old = Nabove(tooth) - - logical :: loop - loop = .True. - pullLoop : do while (loop) + do while (n <= N_det_generators) + if(f(pt2_J(n)) == 0) then + d(pt2_J(n)) = .true. + do while(d(U+1)) + U += 1 + end do - call pull_pt2_results(zmq_socket_pull, index, pt2_mwen, task_id, n_tasks) - do i=1,n_tasks - pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i) - parts_to_get(index(i)) -= 1 - if(parts_to_get(index(i)) < 0) then - print *, i, index(i), parts_to_get(index(i)) - print *, parts_to_get - stop "PARTS ??" - end if - if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. - enddo - - integer, external :: zmq_delete_tasks - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then - cycle - endif - if (more == 0) then - loop = .False. - endif - - call wall_time(time) - - - if(time - timeLast > 5d0 .or. (.not.loop)) then - timeLast = time - do i=1, first_det_of_teeth(1)-1 - if(.not.(actually_computed(i))) then - cycle pullLoop + ! Deterministic part + do while(t <= pt2_N_teeth) + if(U >= pt2_n_0(t+1)) then + t=t+1 + E0 = 0.d0 + do i=pt2_n_0(t),1,-1 + E0 += eI(pt2_stoch_istate, i) + end do + else + exit end if end do - - integer, external :: zmq_abort - double precision :: E0, avg, prop - - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - call get_first_tooth(actually_computed, tooth) - - if (firstTBDcomb > Ncomb) then - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (1)' - endif - endif -! exit pullLoop - endif - - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - if (tooth <= comb_teeth) then - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1.d0) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - else - eqt = 0.d0 - tooth=comb_teeth - endif - call wall_time(time) - if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 10.d0) then - ! Termination + + ! Add Stochastic part + c = pt2_R(n) + if(c > 0) then + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = pt2_find_sample(v, pt2_cW) + x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do + avg = E0 + S(t) / dble(c) pt2(pt2_stoch_istate) = avg - error(pt2_stoch_istate) = eqt - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' + ! 1/(N-1.5) : see Brugger, The American Statistician (23) 4 p. 32 (1969) + if(c > 2) then + eqt = dabs((S2(t) / c) - (S(t)/c)**2) ! dabs for numerical stability + eqt = sqrt(eqt / (dble(c) - 1.5d0)) + error(pt2_stoch_istate) = eqt + if(mod(c,10)==0 .or. n==N_det_generators) then + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E, eqt, time-time0, '' + if( dabs(error(pt2_stoch_istate) / pt2(pt2_stoch_istate)) < relative_error) then + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif + endif endif endif - else - if ( (Nabove(tooth) > 2.d0) .and. (Nabove(tooth) > Nabove_old) ) then - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - Nabove_old = Nabove(tooth) - endif + time = omp_get_wtime() + end if + n += 1 + else if(more == 0) then + exit + else + call pull_pt2_results(zmq_socket_pull, index, eI_task, task_id, n_tasks) + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' endif + do i=1,n_tasks + eI(:, index(i)) += eI_task(:, i) + f(index(i)) -= 1 + end do end if - end do pullLoop - - if(tooth == comb_teeth+1) then - pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:)) - error(pt2_stoch_istate) = 0d0 - else - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) - error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - end if - + end do call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call sort_selection_buffer(b) end subroutine -integer function pt2_find(v, w, sze, imin, imax) + +integer function pt2_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) - integer :: i,l,h - integer, parameter :: block=64 + double precision, intent(in) :: v, w(0:N_det_generators) + integer :: i,l,r - l = imin - h = imax-1 + l = 0 + r = N_det_generators - do while(h-l >= block) - i = ishft(h+l,-1) - if(w(i+1) > v) then - h = i-1 + do while(r-l > 1) + i = (r+l) / 2 + if(w(i) < v) then + l = i else - l = i+1 + r = i end if end do - !DIR$ LOOP COUNT (64) - do pt2_find=l,h - if(w(pt2_find) >= v) then + i = r + do r=i+1,N_det_generators + if (w(r) /= w(i)) then exit - end if - end do + endif + enddo + pt2_find_sample = r-1 end function -BEGIN_PROVIDER [ integer, comb_teeth ] + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] implicit none - BEGIN_DOC -! Number of teeth in the comb - END_DOC - comb_teeth = min(1+N_det/10,100) + integer :: N_c, N_j, U, t, i + double precision :: v + logical, allocatable :: d(:) + integer, external :: pt2_find_sample + + allocate(d(N_det_generators)) + + pt2_R(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + do i=1,N_j + d(i) = .true. + pt2_J(i) = i + end do + call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) + call RANDOM_NUMBER(pt2_u) + call RANDOM_NUMBER(pt2_u) + + + + U = 0 + + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = pt2_find_sample(v, pt2_cW) + if(.not. d(i)) then + N_j += 1 + pt2_J(N_j) = i + d(i) = .true. + end if + end do + + pt2_R(N_j) = N_c + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J(N_j) = U + d(U) = .true. + exit; + end if + end do + enddo + if(N_det_generators > 1) then + pt2_R(N_det_generators-1) = 0 + pt2_R(N_det_generators) = N_c + end if END_PROVIDER - -subroutine get_first_tooth(computed, first_teeth) + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - logical, intent(in) :: computed(N_det_generators) - integer, intent(out) :: first_teeth - integer :: i, first_det + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: pt2_find_sample - first_det = N_det_generators+1+1 - first_teeth = 1 - do i=first_det_of_comb, N_det_generators - if(.not.(computed(i))) then - first_det = i + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 + 1.d-20 + enddo + + double precision :: norm + norm = 0.d0 + do i=N_det_generators,1,-1 + norm += tilde_w(i) + enddo + + tilde_w(:) = tilde_w(:) / norm + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - end do - - do i=comb_teeth+1, 1, -1 - if(first_det_of_teeth(i) < first_det) then - first_teeth = i - exit + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" end if end do - -end subroutine - - -BEGIN_PROVIDER [ integer*8, size_tbc ] - implicit none - BEGIN_DOC -! Size of the tbc array - END_DOC - size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first -END_PROVIDER - -subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) - implicit none - integer, intent(inout) :: Ncomb - double precision, intent(out) :: comb(Ncomb) - integer, intent(inout) :: tbc(0:size_tbc) - logical, intent(inout) :: computed(N_det_generators) - integer :: i, j, last_full, dets(comb_teeth) - integer :: icount, n - integer :: k, l - l=first_det_of_comb - call RANDOM_NUMBER(comb) - do i=1,size(comb) - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) - Ncomb = i - if (tbc(0) == N_det_generators) return - do while (computed(l)) - l=l+1 - enddo - k=tbc(0)+1 - tbc(k) = l - computed(l) = .True. - tbc(0) = k - enddo - -end subroutine - - - -subroutine get_comb(stato, dets, ct) - implicit none - integer, intent(in) :: ct - double precision, intent(in) :: stato - integer, intent(out) :: dets(ct) - double precision :: curs - integer :: j - integer, external :: pt2_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = pt2_find_sample(r, tilde_cW) end do -end subroutine - - -subroutine add_comb(comb, computed, tbc, stbc, ct) - implicit none - integer*8, intent(in) :: stbc - integer, intent(in) :: ct - double precision, intent(in) :: comb - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(0:stbc) - integer :: i, k, l, dets(ct) - - !DIR$ FORCEINLINE - call get_comb(comb, dets, ct) - - k=tbc(0)+1 - do i = 1, ct - l = dets(i) - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + if (tooth_width == 0.d0) then + tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))) + endif + ASSERT(tooth_width > 0.d0) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - tbc(0) = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, pt2_stoch_istate ] - implicit none - BEGIN_DOC - ! State for stochatsic PT2 - END_DOC - pt2_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: pt2_find - - pt2_weight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 - pt2_cweight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 + pt2_cW(0) = 0d0 do i=1,N_det_generators - pt2_weight(i) = psi_coef_generators(i,pt2_stoch_istate)**2 - enddo - - ! Important to loop backwards for numerical precision - pt2_cweight(N_det_generators) = pt2_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - pt2_cweight(i) = pt2_weight(i) + pt2_cweight(i+1) + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do - - do i=1,N_det_generators - pt2_weight(i) = pt2_weight(i) / pt2_cweight(1) - pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(1) - enddo - - do i=1,N_det_generators-1 - pt2_cweight(i) = 1.d0 - pt2_cweight(i+1) - end do - pt2_cweight(N_det_generators) = 1.d0 - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - first_det_of_comb = 1 - do i=1,N_det_generators - if(pt2_weight(i)/norm_left < .5d0*comb_step) then - first_det_of_comb = i - exit - end if - norm_left -= pt2_weight(i) - end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - stato -= comb_step - end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ] - implicit none - BEGIN_DOC -! Inverse of pt2_weight array - END_DOC - integer :: i - do i=1,N_det_generators - pt2_weight_inv(i) = 1.d0/pt2_weight(i) - enddo - + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 6be25846..732c8ca8 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -22,12 +22,11 @@ subroutine run_pt2_slave(thread,iproc,energy) logical :: done double precision,allocatable :: pt2(:,:) - integer :: n_tasks, k, n_tasks_max + integer :: n_tasks, k integer, allocatable :: i_generator(:), subset(:) - n_tasks_max = N_det_generators/100+1 - allocate(task_id(n_tasks_max), task(n_tasks_max)) - allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -47,7 +46,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do while (.not.done) n_tasks = max(1,n_tasks) - n_tasks = min(n_tasks,n_tasks_max) + n_tasks = min(n_tasks,pt2_n_tasks_max) integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then @@ -66,7 +65,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 - call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k)) + call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k))) enddo call wall_time(time1) @@ -78,7 +77,6 @@ subroutine run_pt2_slave(thread,iproc,energy) ! Try to adjust n_tasks around 1 second per job n_tasks = min(n_tasks,int( 1.d0*dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 -! n_tasks = n_tasks+1 end do integer, external :: disconnect_from_taskserver @@ -201,12 +199,5 @@ IRP_ENDIF end subroutine - -BEGIN_PROVIDER [ double precision, pt2_workload, (N_det_generators) ] - integer :: i - do i=1,N_det_generators - pt2_workload(i) = dfloat(N_det_generators - i + 1)**2 - end do - pt2_workload = pt2_workload / sum(pt2_workload) -END_PROVIDER + diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 39f6c01c..cdf83dff 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -1,122 +1,4 @@ subroutine run_selection_slave(thread,iproc,energy) - implicit none - integer, intent(in) :: thread, iproc - double precision, intent(in) :: energy(N_states_diag) - call run_selection_slave_new(thread,iproc,energy) -end - -subroutine run_selection_slave_new(thread,iproc,energy) - use f77_zmq - use selection_types - implicit none - - integer, intent(in) :: thread, iproc - double precision, intent(in) :: energy(N_states_diag) - integer :: rc, i, N - logical :: buffer_ready - - integer :: worker_id, ltask - character*(512), allocatable :: task(:) - integer, allocatable :: task_id(:) - - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_push_socket - integer(ZMQ_PTR) :: zmq_socket_push - - type(selection_buffer) :: buf, buf2 - logical :: done - - double precision,allocatable :: pt2(:,:) - integer :: n_tasks, k, n_tasks_max - integer, allocatable :: i_generator(:), subset(:) - - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order - PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order - - buffer_ready = .False. - n_tasks_max = N_det_generators/100+1 - allocate(task_id(n_tasks_max), task(n_tasks_max)) - allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max)) - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - - integer, external :: connect_to_taskserver - if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - return - endif - - zmq_socket_push = new_zmq_push_socket(thread) - - buf%N = 0 - n_tasks = 1 - call create_selection_buffer(0, 0, buf) - done = .False. - do while (.not.done) - - n_tasks = max(1,n_tasks) - n_tasks = min(n_tasks,n_tasks_max) - - integer, external :: get_tasks_from_taskserver - if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then - exit - endif - done = task_id(n_tasks) == 0 - if (done) n_tasks = n_tasks-1 - if (n_tasks == 0) exit - - do k=1,n_tasks - read (task(k),*) subset(k), i_generator(k), N - enddo - - if(buf%N == 0) then - ! Only first time - call create_selection_buffer(N, N*2, buf) - call create_selection_buffer(N, N*2, buf2) - buffer_ready = .True. - endif - - double precision :: time0, time1 - call wall_time(time0) - do k=1,n_tasks - pt2(:,k) = 0.d0 - buf%cur = 0 - call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k)) - enddo - call wall_time(time1) - - integer, external :: tasks_done_to_taskserver - if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then - done = .true. - endif - call sort_selection_buffer(buf) - call merge_selection_buffers(buf,buf2) - call push_selection_results(zmq_socket_push, pt2, buf, task_id, n_tasks) - buf%mini = buf2%mini - pt2(:,:) = 0d0 - buf%cur = 0 - -! ! Try to adjust n_tasks around 5 second per job -! n_tasks = min(n_tasks,int( 5.d0 * dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 - n_tasks = n_tasks+1 - end do - - integer, external :: disconnect_from_taskserver - if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then - continue - endif - - call end_zmq_push_socket(zmq_socket_push,thread) - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call delete_selection_buffer(buf) - -end - -subroutine run_selection_slave_old(thread,iproc,energy) use f77_zmq use selection_types implicit none @@ -177,7 +59,7 @@ subroutine run_selection_slave_old(thread,iproc,energy) else ASSERT (N == buf%N) end if - call select_connected(i_generator,energy,pt2,buf,subset) + call select_connected(i_generator,energy,pt2,buf,subset,pt2_F(i_generator)) endif integer, external :: task_done_to_taskserver diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 277e6be5..047a0b26 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -1,14 +1,5 @@ use bitmasks -BEGIN_PROVIDER [ integer, fragment_count ] - implicit none - BEGIN_DOC - ! Number of fragments for the deterministic part - END_DOC - fragment_count = (elec_alpha_num-n_core_orb)**2 -END_PROVIDER - - subroutine assert(cond, msg) character(*), intent(in) :: msg logical, intent(in) :: cond @@ -46,11 +37,11 @@ subroutine get_mask_phase(det, phasemask) end subroutine -subroutine select_connected(i_generator,E0,pt2,b,subset) +subroutine select_connected(i_generator,E0,pt2,b,subset,csubset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset type(selection_buffer), intent(inout) :: b double precision, intent(inout) :: pt2(N_states) integer :: k,l @@ -71,7 +62,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset) + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset,csubset) enddo deallocate(fock_diag_tmp) end subroutine @@ -266,7 +257,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) end -subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -274,7 +265,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d ! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_tot_num) double precision, intent(in) :: E0(N_states) @@ -298,8 +289,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer(bit_kind), allocatable:: preinteresting_det(:,:,:) allocate (preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count - monoAdo = .true. monoBdo = .true. @@ -571,7 +560,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 88b30172..415270f1 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -14,7 +14,7 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context n_states_diag - PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count ci_energy mpi_master zmq_state zmq_context + PROVIDE pt2_e0_denominator mo_tot_num N_int ci_energy mpi_master zmq_state zmq_context PROVIDE psi_det psi_coef end diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index 8036985a..bfb1480b 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -14,7 +14,7 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context n_states_diag - PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count ci_energy mpi_master zmq_state zmq_context + PROVIDE pt2_e0_denominator mo_tot_num N_int ci_energy mpi_master zmq_state zmq_context PROVIDE psi_det psi_coef end diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index b3a87e95..397bee82 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -12,15 +12,13 @@ subroutine ZMQ_selection(N_in, pt2) double precision, intent(out) :: pt2(N_states) - PROVIDE fragment_count - N = max(N_in,1) if (.True.) then PROVIDE pt2_e0_denominator nproc PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order fragment_count + PROVIDE psi_bilinear_matrix_transp_order call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection') @@ -60,9 +58,8 @@ subroutine ZMQ_selection(N_in, pt2) task = ' ' do i= 1, N_det_generators -! /!\ Fragments don't work -! if (i>-ishft(N_det_generators,-2)) then - write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') 0, i, N + do j=1,pt2_F(pt2_J(i)) + write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N ipos += 30 if (ipos > 63970) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then @@ -70,18 +67,7 @@ subroutine ZMQ_selection(N_in, pt2) endif ipos=1 endif -! else -! do j=1,fragment_count -! write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N -! ipos += 30 -! if (ipos > 63970) then -! if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then -! stop 'Unable to add task to task server' -! endif -! ipos=1 -! endif -! end do -! endif + end do enddo if (ipos > 1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index c40ba2d4..2ce6f854 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det do i=1,N_det norm = norm + psi_average_norm_contrib_sorted(i) - if (norm >= threshold_generators) then + if (norm > threshold_generators+1d-10) then N_det_generators = i exit endif @@ -29,7 +29,6 @@ END_PROVIDER ! For Single reference wave functions, the generator is the ! Hartree-Fock determinant END_DOC - integer :: i, k psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted(1:N_int,1:2,1:N_det) psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted(1:N_det,1:N_states) @@ -44,7 +43,6 @@ END_PROVIDER ! For Single reference wave functions, the generator is the ! Hartree-Fock determinant END_DOC - integer :: i, k psi_det_sorted_gen = psi_det_sorted psi_coef_sorted_gen = psi_coef_sorted psi_det_sorted_gen_order = psi_det_sorted_order diff --git a/plugins/Perturbation/EZFIO.cfg b/plugins/Perturbation/EZFIO.cfg index 8c56b03a..485e15cd 100644 --- a/plugins/Perturbation/EZFIO.cfg +++ b/plugins/Perturbation/EZFIO.cfg @@ -17,12 +17,6 @@ doc: Stop stochastic PT2 when the relative error is smaller than PT2_relative_er interface: ezfio,provider,ocaml default: 0.001 -[PT2_absolute_error] -type: Threshold -doc: Stop stochastic PT2 when the statistical error is smaller than PT2_absolute_error -interface: ezfio,provider,ocaml -default: 0.00001 - [correlation_energy_ratio_max] type: Normalized_float doc: The selection process stops at a fixed correlation ratio (useful for getting same accuracy between molecules) diff --git a/plugins/dress_zmq/EZFIO.cfg.example b/plugins/dress_zmq/EZFIO.cfg.example index 49400b2f..9d9ddb2b 100644 --- a/plugins/dress_zmq/EZFIO.cfg.example +++ b/plugins/dress_zmq/EZFIO.cfg.example @@ -10,3 +10,9 @@ doc: Maximum number of dressed CI iterations interface: ezfio,provider,ocaml default: 10 +[dress_relative_error] +type: Normalized_float +doc: Stop stochastic PT2 when the relative error is smaller than PT2_relative_error +interface: ezfio,provider,ocaml +default: 0.001 + diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index f590f5d1..d59ab032 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -2,10 +2,10 @@ use bitmasks -subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) +subroutine alpha_callback(delta_ij_loc, i_generator, subset, csubset, iproc) use bitmasks implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) integer, intent(in) :: iproc @@ -15,7 +15,7 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) do l=1,N_generators_bitmask - call generate_singles_and_doubles(delta_ij_loc, i_generator,l,subset,iproc) + call generate_singles_and_doubles(delta_ij_loc,i_generator,l,subset,csubset,iproc) enddo end subroutine @@ -34,7 +34,7 @@ BEGIN_PROVIDER [ integer, psi_from_sorted_gen, (N_det) ] END_PROVIDER -subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, iproc) +subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, csubset, iproc) use bitmasks implicit none BEGIN_DOC @@ -42,7 +42,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index END_DOC double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) - integer, intent(in) :: i_generator, subset, bitmask_index + integer, intent(in) :: i_generator, subset, csubset, bitmask_index integer, intent(in) :: iproc @@ -66,11 +66,11 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index integer(bit_kind), allocatable:: preinteresting_det(:,:,:) integer ,allocatable :: abuf(:), labuf(:) - allocate(abuf(0:N_det*6), labuf(0:N_det)) + allocate(abuf(N_det*6), labuf(N_det)) allocate(preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count + maskInd = -1 monoAdo = .true. monoBdo = .true. @@ -193,7 +193,6 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate(counted(mo_tot_num, mo_tot_num), countedOrb(mo_tot_num, 2)) allocate (indexes(0:mo_tot_num, 0:mo_tot_num)) allocate (indexes_end(0:mo_tot_num, 0:mo_tot_num)) - maskInd = -1 integer :: nb_count do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first @@ -345,7 +344,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle @@ -387,7 +386,7 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned, integer(bit_kind), allocatable :: det_minilist(:,:,:) - allocate(abuf(0:siz), labuf(0:N_det), putten(N_det), det_minilist(N_int, 2, N_det)) + allocate(abuf(siz), labuf(N_det), putten(N_det), det_minilist(N_int, 2, N_det)) do i=1,siz abuf(i) = psi_from_sorted_gen(rabuf(i)) @@ -638,7 +637,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, indexes, ab integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) integer, intent(inout) :: indexes(0:mo_tot_num, 0:mo_tot_num) - integer, intent(inout) :: abuf(0:*) + integer, intent(inout) :: abuf(*) integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt, s integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) integer :: phasemask(2,N_int*bit_kind_size) @@ -704,7 +703,7 @@ subroutine get_d2(i_gen, gen, banned, bannedOrb, indexes, abuf, mask, h, p, sp) implicit none integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer, intent(inout) :: abuf(0:*) + integer, intent(inout) :: abuf(*) integer, intent(in) :: i_gen logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) integer, intent(inout) :: indexes(0:mo_tot_num, 0:mo_tot_num) @@ -832,7 +831,7 @@ subroutine get_d1(i_gen, gen, banned, bannedOrb, indexes, abuf, mask, h, p, sp) implicit none integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer, intent(inout) :: abuf(0:*) + integer, intent(inout) :: abuf(*) integer,intent(in) :: i_gen logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) integer(bit_kind) :: det(N_int, 2) diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index b99eb1d7..01163007 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -29,8 +29,6 @@ subroutine run_dressing(N_st,energy) delta_E = 1.d0 iteration = 0 do iteration=1,n_it_dress_max - N_det_delta_ij = N_det - touch N_det_delta_ij print *, '===============================================' print *, 'Iteration', iteration, '/', n_it_dress_max print *, '===============================================' @@ -40,13 +38,11 @@ subroutine run_dressing(N_st,energy) do i=1,N_st print *, i, psi_energy(i)+nuclear_repulsion enddo - !print *, "DELTA IJ", delta_ij(1,1,1) - PROVIDE delta_ij_tmp - if(.true.) call delta_ij_done() print *, 'Dressed energy ' do i=1,N_st print *, i, ci_energy_dressed(i) enddo + energy(1:N_st) = ci_energy_dressed(1:N_st) call diagonalize_ci_dressed E_new = sum(psi_energy(:)) @@ -56,7 +52,6 @@ subroutine run_dressing(N_st,energy) call write_double(6,delta_E,"delta_E (undressed)") delta_E = dabs(delta_E) call save_wavefunction -! call ezfio_set_dress_zmq_energy(ci_energy_dressed(1)) if (delta_E < thresh_dress) then exit endif @@ -67,10 +62,9 @@ subroutine run_dressing(N_st,energy) enddo print *, 'Dressed energy ' do i=1,N_st - print *, i, ci_energy_dressed(i)+nuclear_repulsion + print *, i, ci_energy_dressed(i) enddo endif - if(.true.) energy(1:N_st) = 0d0 ! ci_energy_dressed(1:N_st) end diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 6de3e2da..33238df2 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -50,9 +50,7 @@ subroutine run_wf else if (zmq_state(:5) == 'dress') then ! Dress ! --------- - !call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle - !TOUCH psi_det if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle @@ -60,7 +58,7 @@ subroutine run_wf if (zmq_get_dvector(zmq_to_qp_run_socket,1,'dress_stoch_istate',tmp,1) == -1) cycle dress_stoch_istate = int(tmp) psi_energy(1:N_states) = energy(1:N_states) - TOUCH psi_energy dress_stoch_istate state_average_weight + TOUCH psi_energy dress_stoch_istate state_average_weight PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 6b7bf396..3b9d128d 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -1,17 +1,225 @@ -BEGIN_PROVIDER [ integer, fragment_first ] +BEGIN_PROVIDER [ integer, dress_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic dressing + END_DOC + dress_stoch_istate = 1 +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] implicit none - fragment_first = first_det_of_teeth(1) + logical, external :: testTeethBuilding + integer :: i + pt2_F(:) = 1 + pt2_n_tasks_max = 20 +! do i=1,N_det_generators +! if (maxval(dabs(psi_coef_sorted_gen(i,:))) > 0.001d0) then +! pt2_F(i) = max(1,min( (elec_alpha_num-n_core_orb)**2, pt2_n_tasks_max)) +! endif +! enddo + + if(N_det_generators < 1024) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = min(5, N_det_generators) + do pt2_N_teeth=100,2,-1 + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do + end if + call write_int(6,pt2_N_teeth,'Number of comb teeth') END_PROVIDER -subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + do i=1,N_det_generators + tilde_w(i) = psi_coef_sorted_gen(i,dress_stoch_istate)**2 + 1.d-20 + enddo + + double precision :: norm + norm = 0.d0 + do i=N_det_generators,1,-1 + norm += tilde_w(i) + enddo + + tilde_w(:) = tilde_w(:) / norm + + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 + + n0 = 0 + testTeethBuilding = .false. + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if (dabs(Wt) <= 1.d-3) then + return + endif + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + return + end if + end do + stop "exited testTeethBuilding" +end function + +BEGIN_PROVIDER[ integer, dress_N_cp_max ] + dress_N_cp_max = 64 +END_PROVIDER + + BEGIN_PROVIDER[integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER [integer, dress_R1, (0:N_det_generators) ] + implicit none + integer :: m,j + integer :: l,nmov + integer, allocatable :: iorder(:) + allocate(iorder(N_det_generators)) + + pt2_J = pt2_J_ + dress_R1 = dress_R1_ + + do m=1,dress_N_cp + nmov = 0 + l=dress_R1(m-1)+1 + do j=l, dress_R1(m) + if(dress_M_mi(m, pt2_J(j)) == 0 .and. pt2_J(j) > dress_dot_n_0(m)) then + pt2_J(j) += N_det_generators + nmov += 1 + end if + end do + if(dress_R1(m)-dress_R1(m-1) > 0) then + call isort(pt2_J(l), iorder, dress_R1(m)-dress_R1(m-1)) + end if + dress_R1(m) -= nmov + do j=dress_R1(m)+1, dress_R1(m) + nmov + pt2_J(j) -= N_det_generators + end do + end do +END_PROVIDER + + BEGIN_PROVIDER[ integer, dress_M_m, (dress_N_cp_max)] +&BEGIN_PROVIDER[ integer, pt2_J_, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, dress_R1_, (0:N_det_generators)] +&BEGIN_PROVIDER[ double precision, dress_M_mi, (dress_N_cp_max, N_det_generators+1)] +&BEGIN_PROVIDER [ integer, dress_T, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, dress_N_cp ] + implicit none + integer :: N_c, N_j, U, t, i, m + double precision :: v + double precision, allocatable :: tilde_M(:) + logical, allocatable :: d(:) + integer, external :: dress_find_sample + + allocate(d(N_det_generators), tilde_M(N_det_generators)) + + dress_M_mi = 0d0 + tilde_M = 0d0 + dress_R1_(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + + ! Set here the positions of the checkpoints +! U = N_det_generators/((dress_N_cp_max**2+dress_N_cp_max)/2)+1 +! do i=1, dress_N_cp_max-1 +! dress_M_m(i) = U * (((i*i)+i)/2) + 10 +! end do +! dress_M_m(dress_N_cp_max) = N_det_generators+1 + do i=1, dress_N_cp_max-1 + dress_M_m(i) = ishft(1,i+3) + end do + dress_M_m(dress_N_cp_max) = N_det_generators+1 + + do i=1,N_j + d(i) = .true. + pt2_J_(i) = i + end do + call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) + call RANDOM_NUMBER(pt2_u) + call RANDOM_NUMBER(pt2_u) + + U = 0 + + m = 1 + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = dress_find_sample(v, pt2_cW) + tilde_M(i) += 1d0 + if(.not. d(i)) then + N_j += 1 + pt2_J_(N_j) = i + d(i) = .true. + end if + end do + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J_(N_j) = U + d(U) = .true. + exit; + end if + end do + + if(N_c == dress_M_m(m)) then + dress_R1_(m) = N_j + dress_M_mi(m, :N_det_generators) = tilde_M(:) + m += 1 + end if + enddo + + dress_N_cp = m-1 + dress_R1_(dress_N_cp) = N_j + dress_M_m(dress_N_cp) = N_c + !!!!!!!!!!!!!! + + do i=1, pt2_n_0(1) + dress_T(i) = 0 + end do + + do t=2,pt2_N_teeth+1 + do i=pt2_n_0(t-1)+1, pt2_n_0(t) + dress_T(i) = t-1 + end do + end do + !!!!!!!!!!!!! +END_PROVIDER + + +subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) use f77_zmq + use selection_types implicit none - integer, intent(in) :: lndet character(len=64000) :: task - character(len=3200) :: temp integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -24,23 +232,17 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) integer :: i, j, k, Ncp - double precision, external :: omp_get_wtime - double precision :: time integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) task(:) = CHAR(0) - temp(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) do dress_stoch_istate=1,N_states - SOFT_TOUCH dress_stoch_istate state_average_weight(:) = 0.d0 state_average_weight(dress_stoch_istate) = 1.d0 - TOUCH state_average_weight + TOUCH state_average_weight dress_stoch_istate - !provide psi_coef_generators - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral dress_weight psi_selectors - !print *, dress_e0_denominator + provide nproc mo_bielec_integrals_in_map mo_mono_elec_integral psi_selectors pt2_F print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' @@ -75,65 +277,32 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket - integer :: ipos, sz - integer :: block(1), block_i, cur_tooth_reduce, ntas - logical :: flushme - block = 0 - block_i = 0 - cur_tooth_reduce = 0 - ipos=1 - ntas = 0 - do i=1,N_dress_jobs+1 - flushme = (i==N_dress_jobs+1 .or. block_i == size(block) .or. block_i >=cur_tooth_reduce ) - if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) - - if(flushme .and. block_i > 0) then - if(block(1) > fragment_first) then - ntas += 1 - write(temp, '(I9,1X,60(I9,1X))') 0, block(:block_i) - sz = len(trim(temp))+1 - temp(sz:sz) = '|' - !write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) - write(task(ipos:ipos+sz), *) temp(:sz) - !ipos += 20 - ipos += sz+1 - if (ipos > 63000 .or. i==N_dress_jobs+1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - - ipos=1 - endif - else - if(block_i /= 1) stop "reduced fragmented dets" - do j=1,fragment_count - ntas += 1 - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, block(1) - ipos += 20 - if (ipos > 63000 .or. i==N_dress_jobs+1) then - ntas += 1 - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if - block_i = 0 - block = 0 - end if - - if(i /= N_dress_jobs+1) then - cur_tooth_reduce = tooth_reduce(dress_jobs(i)) - block_i += 1 - block(block_i) = dress_jobs(i) - end if + + + do i=1,N_det_generators + do j=1,pt2_F(pt2_J(i)) + write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + stop 'Unable to add task to task server' + endif + end do end do if (zmq_set_running(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Failed in zmq_set_running' endif + integer :: nproc_target + nproc_target = nproc + double precision :: mem + mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 + 3.d0 + 5.d0) / (1024.d0**3) + call write_double(6,mem,'Estimated memory/thread (Gb)') + if (qp_max_mem > 0) then + nproc_target = max(1,int(dble(qp_max_mem)/mem)) + nproc_target = min(nproc_target,nproc) + endif + call omp_set_nested(.true.) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) & !$OMP PRIVATE(i) i = omp_get_thread_num() @@ -144,15 +313,17 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) call dress_slave_inproc(i) endif !$OMP END PARALLEL - call omp_set_nested(.false.) + delta_out(dress_stoch_istate,1:N_det) = delta(dress_stoch_istate,1:N_det) delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2(dress_stoch_istate,1:N_det) + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') print *, '========== ================= ================= =================' enddo FREE dress_stoch_istate state_average_weight(:) = state_average_weight_save(:) +! call omp_set_nested(.false.) TOUCH state_average_weight deallocate(delta,delta_s2) @@ -166,6 +337,73 @@ subroutine dress_slave_inproc(i) call run_dress_slave(1,i,dress_e0_denominator) end + BEGIN_PROVIDER [integer, dress_dot_F, (dress_N_cp)] +&BEGIN_PROVIDER [ integer, dress_P, (N_det_generators) ] + implicit none + integer :: m,i + + do m=1,dress_N_cp + do i=dress_R1(m-1)+1, dress_R1(m) + dress_P(pt2_J(i)) = m + end do + end do + + dress_dot_F = 0 + do m=1,dress_N_cp + do i=dress_R1(m-1)+1,dress_R1(m) + dress_dot_F(m) += pt2_F(pt2_J(i)) + end do + end do + do m=2,dress_N_cp + dress_dot_F(m) += dress_dot_F(m-1) + end do +END_PROVIDER + +BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_t, (0:dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_n_0, (0:dress_N_cp)] + implicit none + + logical, allocatable :: d(:) + integer :: U, m, t, i + + allocate(d(N_det_generators+1)) + + dress_e(:,:) = 0d0 + dress_dot_t(:) = 0 + dress_dot_n_0(:) = 0 + d(:) = .false. + U=0 + + do m=1,dress_N_cp + do i=dress_R1_(m-1)+1,dress_R1_(m) + !dress_dot_F(m) += pt2_F(pt2_J_(i)) + d(pt2_J_(i)) = .true. + end do + + do while(d(U+1)) + U += 1 + end do + + dress_dot_t(m) = pt2_N_teeth + 1 + dress_dot_n_0(m) = N_det_generators + + do t = 2, pt2_N_teeth+1 + if(U < pt2_n_0(t)) then + dress_dot_t(m) = t-1 + dress_dot_n_0(m) = pt2_n_0(t-1) + exit + end if + end do + do i=dress_dot_n_0(m)+1, N_det_generators !pt2_n_0(t+1) + dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) + end do + end do + + do m=dress_N_cp, 2, -1 + dress_e(:,m) -= dress_e(:,m-1) + end do +END_PROVIDER subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, dress, istate) @@ -179,553 +417,257 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(in) :: relative_error, E(N_states) double precision, intent(out) :: dress(N_states) - double precision, allocatable :: cp(:,:,:,:) double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:) - double precision, allocatable :: dress_detail(:,:) - double precision :: dress_mwen(N_states) + double precision, allocatable :: breve_delta_m(:,:,:), S(:), S2(:) + double precision, allocatable :: edI(:), edI_task(:) + integer, allocatable :: edI_index(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer(ZMQ_PTR), external :: new_zmq_pull_socket - - integer :: more - integer :: i, j, k, i_state, N - integer :: task_id, ind - double precision, save :: time0 = -1.d0 - double precision :: time + integer(ZMQ_PTR), external :: new_zmq_pull_socket, zmq_abort + integer, allocatable :: task_id(:) + integer :: i, c, j, k, f, t, m, p, m_task + integer :: more, n_tasks + double precision :: E0, error, x, v, time, time0 + double precision :: avg, eqt double precision, external :: omp_get_wtime - integer :: cur_cp, last_cp - integer :: delta_loc_cur, is, N_buf(3) - integer, allocatable :: int_buf(:), agreg_for_cp(:) - double precision, allocatable :: double_buf(:) - integer(bit_kind), allocatable :: det_buf(:,:,:) - integer, external :: zmq_delete_tasks - last_cp = 10000000 - allocate(agreg_for_cp(N_cp)) - agreg_for_cp = 0 - allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) - delta_loc_cur = 1 + integer, allocatable :: dot_f(:) + integer, external :: zmq_delete_tasks, dress_find_sample + logical :: found + integer :: worker_id + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,1) + + found = .false. delta = 0d0 delta_s2 = 0d0 - allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) - allocate(delta_loc(N_states, N_det, 2)) - dress_detail = -1000d0 - cp = 0d0 - character*(512) :: task - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + allocate(task_id(pt2_n_tasks_max)) + allocate(edI(N_det_generators)) + allocate(edI_task(N_det_generators), edI_index(N_det_generators)) + allocate(breve_delta_m(N_states, N_det, 2)) + allocate(dot_f(dress_N_cp+1)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + edI = 0d0 + + dot_f(:dress_N_cp) = dress_dot_F(:) + dot_f(dress_N_cp+1) = 1 + more = 1 + m = 1 + c = 0 + S(:) = 0d0 + S2(:) = 0d0 + time = omp_get_wtime() + time0 = -1d0 ! omp_get_wtime() more = 1 - if (time0 < 0.d0) then - call wall_time(time0) - endif - logical :: loop, floop - floop = .true. - loop = .true. - - pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) - if(floop) then - call wall_time(time) - time0 = time - floop = .false. - end if - if(cur_cp == -1 .and. ind == N_det_generators) then - call wall_time(time) - end if - - if(cur_cp == -1) then - call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then - stop 'Unable to delete tasks' + do while (.not. found) + if(dot_f(m) == 0) then + E0 = 0 + do i=dress_dot_n_0(m),1,-1 + E0 += edI(i) + end do + do while(c < dress_M_m(m)) + c = c+1 + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = dress_find_sample(v, pt2_cW) + x += edI(i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do + end do + t = dress_dot_t(m) + avg = E0 + S(t) / dble(c) + if (c > 2) then + eqt = dabs((S2(t) / c) - (S(t)/c)**2) + eqt = sqrt(eqt / (dble(c)-1.5d0)) + error = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E(istate), eqt, time-time0, '' + else + eqt = 1.d0 + error = eqt endif - if(more == 0) loop = .false. !stop 'loop = .false.' !!!!!!!!!!!!!!!! - dress_detail(:, ind) = dress_mwen(:) - !print *, "DETAIL", ind, dress_mwen - else if(cur_cp > 0) then - if(ind == 0) cycle - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,N_det - cp(:,i,cur_cp,1) += delta_loc(:,i,1) - end do - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,N_det - cp(:,i,cur_cp,2) += delta_loc(:,i,2) - end do - !$OMP END PARALLEL DO - agreg_for_cp(cur_cp) += ind - !print *, agreg_for_cp(cur_cp), ind, needed_by_cp(cur_cp), cur_cp - if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then - stop "too much results..." + m += 1 + if(dabs(error / avg) <= relative_error) then + integer, external :: zmq_put_dvector + i= zmq_put_dvector(zmq_to_qp_run_socket, worker_id, "ending", dble(m-1), 1) + found = .true. end if - if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle - - call wall_time(time) - - last_cp = cur_cp - double precision :: su, su2, eqt, avg, E0, val - integer, external :: zmq_abort - - su = 0d0 - su2 = 0d0 - !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i, val) SHARED(comb, dress_detail, & - !$OMP cur_cp,istate,cps_N) REDUCTION(+:su) REDUCTION(+:su2) - do i=1, int(cps_N(cur_cp)) - call get_comb_val(comb(i), dress_detail, cur_cp, val, istate) - su += val - su2 += val*val - end do - !$OMP END PARALLEL DO - - avg = su / cps_N(cur_cp) - eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) ) - E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) - if(cp_first_tooth(cur_cp) <= comb_teeth) then - E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) - end if - - !print '(I2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', cps_N(cur_cp), avg+E0+E(istate), eqt, time-time0, '' - if ((dabs(eqt/(avg+E0+E(istate))) < relative_error .and. cps_N(cur_cp) >= 10)) then - ! Termination - print *, "TERMINATE" - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' + else + do + call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + if(time0 == -1d0) then + print *, "first pull", omp_get_wtime()-time + time0 = omp_get_wtime() + end if + if(m_task == 0) then + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' endif - endif - endif + else + !if(task_id(1) /= 0) stop "TASKID" + !i= zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + exit + end if + end do + do i=1,n_tasks + if(edI(edI_index(i)) /= 0d0) stop "NIN M" + edI(edI_index(i)) += edI_task(i) + end do + dot_f(m_task) -= f end if - end do pullLoop + end do + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif - delta(:,:) = cp(:,:,last_cp,1) - delta_s2(:,:) = cp(:,:,last_cp,2) + integer :: ff + ff = dress_dot_F(m-1) + delta= 0d0 + delta_s2 = 0d0 + do while(more /= 0) + call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + + !if(task_id(0) == 0) cycle + if(m_task == 0) then + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) + else if(m_task < 0) then + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + end if + + + if(m_task >= 0) cycle + ff = ff - f + delta(:,:) += breve_delta_m(:,:,1) + delta_s2(:,:) += breve_delta_m(:,:,2) + end do dress(istate) = E(istate)+E0+avg + if(ff /= 0) stop "WRONG NUMBER OF FRAGMENTS COLLECTED" + !double precision :: tmp + + !tmp = 0d0 + + !do i=1,N_det + ! if(edi(i) == 0d0) stop "EMPTY" + ! tmp += psi_coef(i, 1) * delta(1, i) + !end do + !print *, "SUM", E(1)+sum(edi(:)) + !print *, "DOT", E(1)+tmp + call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine -integer function dress_find(v, w, sze, imin, imax) +integer function dress_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) - integer :: i,l,h - integer, parameter :: block=64 + double precision, intent(in) :: v, w(0:N_det_generators) + integer :: i,l,r - l = imin - h = imax-1 + l = 0 + r = N_det_generators - do while(h-l >= block) - i = ishft(h+l,-1) - if(w(i+1) > v) then - h = i-1 + do while(r-l > 1) + i = (r+l) / 2 + if(w(i) < v) then + l = i else - l = i+1 + r = i end if end do - !DIR$ LOOP COUNT (64) - do dress_find=l,h - if(w(dress_find) >= v) then + i = r + do r=i+1,N_det_generators + if (w(r) /= w(i)) then exit - end if - end do + endif + enddo + dress_find_sample = r-1 end function - BEGIN_PROVIDER [ integer, gen_per_cp ] -&BEGIN_PROVIDER [ integer, comb_teeth ] -&BEGIN_PROVIDER [ integer, N_cps_max ] + + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - BEGIN_DOC -! N_cps_max : max number of checkpoints -! -! gen_per_cp : number of generators per checkpoint - END_DOC - comb_teeth = min(1+N_det/10,10) - N_cps_max = 16 - gen_per_cp = (N_det_generators / N_cps_max) + 1 -END_PROVIDER + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: dress_find_sample - - BEGIN_PROVIDER [ integer, N_cp ] -&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] -&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] -&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ] -&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] -&BEGIN_PROVIDER [ integer, N_dress_jobs ] -&BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, tooth_reduce, (N_det_generators) ] - implicit none - logical, allocatable :: computed(:), comp_filler(:) - integer :: i, j, last_full, dets(comb_teeth) + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) - integer :: k, l, cur_cp, under_det(comb_teeth+1) - integer :: cp_limit(N_cps_max) - integer, allocatable :: iorder(:), first_cp(:) - integer, allocatable :: filler(:) - integer :: nfiller, lfiller, cfiller - logical :: fracted - - integer :: first_suspect - provide psi_coef_generators - first_suspect = 1 - - allocate(filler(n_det_generators)) - allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) - allocate(computed(N_det_generators)) - allocate(comp_filler(N_det_generators)) - first_cp = 1 - cps = 0d0 - cur_cp = 1 - done_cp_at = 0 - done_cp_at_det = 0 - needed_by_cp = 0 - comp_filler = .false. - computed = .false. - cps_N = 1d0 - tooth_reduce = 0 - - integer :: fragsize - fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) - - do i=1,N_cps_max - cp_limit(i) = fragsize * i * (i+1) / 2 - end do - cp_limit(N_cps_max) = N_det*2 - - N_dress_jobs = first_det_of_comb - 1 - do i=1, N_dress_jobs - dress_jobs(i) = i - computed(i) = .true. - end do - - l=first_det_of_comb - call random_seed(put=(/321,654,65,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) - call RANDOM_NUMBER(comb) - lfiller = 1 - nfiller = 1 do i=1,N_det_generators - !print *, i, N_dress_jobs - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) - - !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then - if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then - first_cp(cur_cp+1) = N_dress_jobs - done_cp_at(N_dress_jobs) = cur_cp - cps_N(cur_cp) = dfloat(i) - if(N_dress_jobs /= N_det_generators) then - cps(:, cur_cp+1) = cps(:, cur_cp) - cur_cp += 1 - end if - - if (N_dress_jobs == N_det_generators) then - exit - end if - end if - - !!!!!!!!!!!!!!!!!!!!!!!! - if(.TRUE.) then - do l=first_suspect,N_det_generators - if((.not. computed(l))) then - N_dress_jobs+=1 - dress_jobs(N_dress_jobs) = l - computed(l) = .true. - first_suspect = l - exit - end if - end do - - if (N_dress_jobs == N_det_generators) exit - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ELSE - !!!!!!!!!!!!!!!!!!!!!!!!!!!! - do l=first_suspect,N_det_generators - if((.not. computed(l)) .and. (.not. comp_filler(l))) exit - end do - first_suspect = l - if(l > N_det_generators) cycle - - cfiller = tooth_of_det(l)-1 - if(cfiller > lfiller) then - do j=1,nfiller-1 - if(.not. computed(filler(j))) then - k=N_dress_jobs+1 - dress_jobs(k) = filler(j) - N_dress_jobs = k - end if - computed(filler(j)) = .true. - end do - nfiller = 2 - filler(1) = l - lfiller = cfiller - else - filler(nfiller) = l - nfiller += 1 - end if - comp_filler(l) = .True. - end if - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + tilde_w(i) = psi_coef_sorted_gen(i,dress_stoch_istate)**2 + 1.d-20 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) enddo - - do j=1,nfiller-1 - if(.not. computed(filler(j)))then - k=N_dress_jobs+1 - dress_jobs(k) = filler(j) - N_dress_jobs = k - end if - computed(filler(j)) = .true. - end do - - - N_cp = cur_cp - - if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then - print *, N_dress_jobs, N_det_generators, N_cp, N_cps_max - stop "error in jobs creation" - end if - - cur_cp = 0 - do i=1,N_dress_jobs - if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) - done_cp_at(i) = cur_cp - done_cp_at_det(dress_jobs(i)) = cur_cp - needed_by_cp(cur_cp) += 1 - end do - - - under_det = 0 - cp_first_tooth = 0 - do i=1,N_dress_jobs - do j=comb_teeth+1,1,-1 - if(dress_jobs(i) <= first_det_of_teeth(j)) then - under_det(j) = under_det(j) + 1 - if(under_det(j) == first_det_of_teeth(j))then - do l=done_cp_at(i)+1, N_cp - cps(:first_det_of_teeth(j)-1, l) = 0d0 - cp_first_tooth(l) = j - end do - cps(first_det_of_teeth(j), done_cp_at(i)+1) = & - cps(first_det_of_teeth(j), done_cp_at(i)+1) * fractage(j) - end if - else - exit - end if - end do - end do - cp_first_tooth(N_cp) = comb_teeth+1 - - do i=1,N_det_generators - do j=N_cp,2,-1 - cps(i,j) -= cps(i,j-1) - end do - end do - - iorder = -1 - - cps(:, N_cp) = 0d0 - - iloop : do i=fragment_first+1,N_det_generators - k = tooth_of_det(i) - if(k == 0) cycle - if (i == first_det_of_teeth(k)) cycle - - do j=1,N_cp - if(cps(i, j) /= 0d0) cycle iloop - end do - - tooth_reduce(i) = k - end do iloop - - do i=1,N_det_generators - if(tooth_reduce(dress_jobs(i)) == 0) dress_jobs(i) = dress_jobs(i)+N_det*2 - end do - - do i=1,N_cp-1 - call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1) - end do - - do i=1,N_det_generators - if(dress_jobs(i) > N_det) dress_jobs(i) = dress_jobs(i) - N_det*2 - end do -END_PROVIDER - - -subroutine get_comb_val(stato, detail, cur_cp, val, istate) - implicit none - integer, intent(in) :: cur_cp, istate - integer :: first - double precision, intent(in) :: stato, detail(N_states, N_det_generators) - double precision, intent(out) :: val - double precision :: curs - integer :: j, k - integer, external :: dress_find - - curs = 1d0 - stato - val = 0d0 - first = cp_first_tooth(cur_cp) - - do j = comb_teeth, first, -1 - !DIR$ FORCEINLINE - k = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - if(k == first_det_of_teeth(first)) then - val += detail(istate, k) * dress_weight_inv(k) * comb_step * fractage(first) - else - val += detail(istate, k) * dress_weight_inv(k) * comb_step - end if - - curs -= comb_step - end do -end subroutine - - -subroutine get_comb(stato, dets) - implicit none - double precision, intent(in) :: stato - integer, intent(out) :: dets(comb_teeth) - double precision :: curs - integer :: j - integer, external :: dress_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step - end do -end subroutine - - -subroutine add_comb(com, computed, cp, N, tbc) - implicit none - double precision, intent(in) :: com - integer, intent(inout) :: N - double precision, intent(inout) :: cp(N_det) - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(N_det_generators) - integer :: i, k, l, dets(comb_teeth) - - !DIR$ FORCEINLINE - call get_comb(com, dets) - k=N+1 - do i = 1, comb_teeth - l = dets(i) - cp(l) += 1d0 - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if - end do - N = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, dress_stoch_istate ] - implicit none - dress_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, dress_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_weight_inv, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, fractage, (comb_teeth) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] -&BEGIN_PROVIDER [ integer, tooth_of_det, (N_det_generators) ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: dress_find - - dress_weight(1) = psi_coef_generators(1,dress_stoch_istate)**2 - dress_cweight(1) = psi_coef_generators(1,dress_stoch_istate)**2 - - do i=1,N_det_generators - dress_weight(i) = psi_coef_generators(i,dress_stoch_istate)**2 + double precision :: norm + norm = 0.d0 + do i=N_det_generators,1,-1 + norm += tilde_w(i) enddo - ! Important to loop backwards for numerical precision - dress_cweight(N_det_generators) = dress_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - dress_cweight(i) = dress_weight(i) + dress_cweight(i+1) - end do - - do i=1,N_det_generators - dress_weight(i) = dress_weight(i) / dress_cweight(1) - dress_cweight(i) = dress_cweight(i) / dress_cweight(1) - enddo + tilde_w(:) = tilde_w(:) / norm - do i=1,N_det_generators-1 - dress_cweight(i) = 1.d0 - dress_cweight(i+1) - end do - dress_cweight(N_det_generators) = 1.d0 + tilde_cW(0) = -1.d0 + do i=1,N_det_generators + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(:) = tilde_cW(:) + 1.d0 - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - !print *, "comb_step", comb_step - first_det_of_comb = 1 - do i=1,N_det_generators ! min(100,N_det_generators) - first_det_of_comb = i - if(dress_weight(i)/norm_left < comb_step) then + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - norm_left -= dress_weight(i) + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" + end if end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - - comb_step = (1d0 - dress_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = dress_find(stato, dress_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - fractage(i) = (dress_cweight(iloc) - stato) / dress_weight(iloc) - stato -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = dress_find_sample(r, tilde_cW) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + if (tooth_width == 0.d0) then + tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))) + endif + ASSERT(tooth_width > 0.d0) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - - - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif + pt2_cW(0) = 0d0 do i=1,N_det_generators - dress_weight_inv(i) = 1.d0/dress_weight(i) - enddo - - tooth_of_det(:first_det_of_teeth(1)-1) = 0 - do i=1,comb_teeth - tooth_of_det(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = i + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - - diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 590b27c6..36920940 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -1,71 +1,9 @@ use bitmasks -BEGIN_PROVIDER [ integer, N_dress_teeth ] - N_dress_teeth = 10 -END_PROVIDER - -BEGIN_PROVIDER [ double precision, dress_norm_acc, (0:N_det, N_states) ] -&BEGIN_PROVIDER [ double precision, dress_norm, (0:N_det, N_states) ] -&BEGIN_PROVIDER [ double precision, dress_teeth_size, (0:N_det, N_states) ] -&BEGIN_PROVIDER [ integer, dress_teeth, (0:N_dress_teeth+1, N_states) ] - implicit none - integer :: i, j, st, nt - double precision :: norm_sto, jump, norm_mwen, norm_loc - - if(N_states /= 1) stop "dress_sto may not work with N_states /= 1" - - do st=1,N_states - dress_teeth(0,st) = 1 - norm_sto = 1d0 - do i=1,N_det - dress_teeth(1,st) = i - jump = (1d0 / dfloat(N_dress_teeth)) * norm_sto - if(psi_coef_generators(i,1)**2 < jump / 2d0) exit - norm_sto -= psi_coef_generators(i,1)**2 - end do - - norm_loc = 0d0 - dress_norm_acc(0,st) = 0d0 - nt = 1 - - do i=1,dress_teeth(1,st)-1 - dress_norm_acc(i,st) = dress_norm_acc(i-1,st) + psi_coef_generators(i,st)**2 - end do - - do i=dress_teeth(1,st), N_det_generators!-dress_teeth(1,st)+1 - norm_mwen = psi_coef_generators(i,st)**2!-1+dress_teeth(1,st),st)**2 - dress_norm_acc(i,st) = dress_norm_acc(i-1,st) + norm_mwen - norm_loc += norm_mwen - if(norm_loc > (jump*dfloat(nt))) then - nt = nt + 1 - dress_teeth(nt,st) = i - end if - end do - if(nt > N_dress_teeth+1) then - print *, "foireouse dress_teeth", nt, dress_teeth(nt,st), N_det - stop - end if - - dress_teeth(N_dress_teeth+1,st) = N_det+1 - norm_loc = 0d0 - do i=N_dress_teeth, 0, -1 - dress_teeth_size(i,st) = dress_norm_acc(dress_teeth(i+1,st)-1,st) - dress_norm_acc(dress_teeth(i,st)-1, st) - dress_norm_acc(dress_teeth(i,st):dress_teeth(i+1,st)-1,st) -= dress_norm_acc(dress_teeth(i,st)-1, st) - dress_norm_acc(dress_teeth(i,st):dress_teeth(i+1,st)-1,st) = & - dress_norm_acc(dress_teeth(i,st):dress_teeth(i+1,st)-1,st) / dress_teeth_size(i,st) - dress_norm(dress_teeth(i,st), st) = dress_norm_acc(dress_teeth(i,st), st) - do j=dress_teeth(i,st)+1, dress_teeth(i+1,1)-1 - dress_norm(j,1) = dress_norm_acc(j, st) - dress_norm_acc(j-1, st) - end do - end do - end do -END_PROVIDER - - BEGIN_PROVIDER [ integer , N_det_delta_ij ] implicit none - N_det_delta_ij = 1 + N_det_delta_ij = N_det END_PROVIDER BEGIN_PROVIDER [ double precision, delta_ij, (N_states, N_det, 2) ] @@ -83,36 +21,23 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] integer :: i,j,k double precision, allocatable :: dress(:), del(:,:), del_s2(:,:) - double precision :: E_CI_before(N_states), relative_error + double precision :: E_CI_before(N_states) + integer :: cnt = 0 - ! prevents re-providing if delta_ij_tmp is - ! just being copied - if(N_det_delta_ij == N_det) then + allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) + + delta_ij_tmp = 0d0 + + E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion + + call write_double(6,dress_relative_error,"Convergence of the stochastic algorithm") - allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) - - delta_ij_tmp = 0d0 - - E_CI_before(:) = psi_energy(:) + nuclear_repulsion - threshold_selectors = 1.d0 - threshold_generators = 1.d0 - SOFT_TOUCH threshold_selectors threshold_generators - ! if(errr /= 0d0) then - ! errr = errr / 2d0 - ! else - ! errr = 1d-4 - ! end if - relative_error = 1.d-3 - - call write_double(6,relative_error,"Relative error for the stochastic algorithm") - - call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij) - delta_ij_tmp(:,:,1) = del(:,:) - delta_ij_tmp(:,:,2) = del_s2(:,:) + call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(dress_relative_error), N_det_delta_ij) + delta_ij_tmp(:,:,1) = del(:,:) + delta_ij_tmp(:,:,2) = del_s2(:,:) - deallocate(dress, del, del_s2) - end if + deallocate(dress, del, del_s2) END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 8801cb3f..0d3201bc 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -1,13 +1,5 @@ use bitmasks -BEGIN_PROVIDER [ integer, fragment_count ] - implicit none - BEGIN_DOC - ! Number of fragments for the deterministic part - END_DOC - fragment_count = 1 -END_PROVIDER - subroutine run_dress_slave(thread,iproce,energy) use f77_zmq @@ -16,81 +8,75 @@ subroutine run_dress_slave(thread,iproce,energy) double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: thread, iproce - integer :: rc, i, subset, i_generator + integer :: rc, i, j, subset, i_generator - integer :: worker_id, task_id, ctask, ltask - character*(5120) :: task + integer :: worker_id, ctask, ltask + character*(512) :: task(Nproc) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - - logical :: done - - double precision,allocatable :: dress_detail(:) - integer :: ind - double precision,allocatable :: delta_ij_loc(:,:,:) - integer :: h,p,n,i_state - logical :: ok - - integer, allocatable :: int_buf(:) - double precision, allocatable :: double_buf(:) - integer(bit_kind), allocatable :: det_buf(:,:,:) - integer :: N_buf(3) - logical :: last + double precision,allocatable :: breve_delta_m(:,:,:) + integer :: i_state,m,l,t,p,sum_f !integer, external :: omp_get_thread_num - double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:) - integer :: toothMwen - logical :: fracted + double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:), edI(:) + double precision, allocatable :: edI_task(:) + integer, allocatable :: edI_index(:), edI_taskID(:) + integer :: n_tasks + + integer :: iproc + integer, allocatable :: f(:) + integer :: cp_sent, cp_done + integer :: cp_max(Nproc) + integer :: will_send, task_id, purge_task_id, ntask_buf + integer, allocatable :: task_buf(:) + integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) + integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending, getting_task double precision :: fac - - + double precision :: ending(1) + integer, external :: zmq_get_dvector +! double precision, external :: omp_get_wtime + double precision :: time, time0 + integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) ! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" - allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) - allocate(cp(N_states, N_det, N_cp, 2)) - delta_det = 0d9 + allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) + allocate(cp(N_states, N_det, dress_N_cp, 2)) + allocate(edI(N_det_generators), f(N_det_generators)) + allocate(edI_index(N_det_generators), edI_task(N_det_generators)) + edI = 0d0 + f = 0 + delta_det = 0d0 cp = 0d0 - + task = CHAR(0) - task(:) = CHAR(0) - - - - integer :: iproc, cur_cp, done_for(0:N_cp) - integer, allocatable :: tasks(:) - integer :: lastCp(Nproc) - integer :: lastSent, lastSendable - logical :: send - integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1) - integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1) - - do i=0,N_cp+1 + call omp_init_lock(sending) + call omp_init_lock(getting_task) + do i=0,dress_N_cp+1 call omp_init_lock(lck_sto(i)) end do - do i=0,comb_teeth+1 + do i=0,pt2_N_teeth+1 call omp_init_lock(lck_det(i)) end do - lastCp = 0 - lastSent = 0 - send = .false. - done_for = 0 - - double precision :: hij, sij - !call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) - - hij = dress_E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL + cp_done = 0 + cp_sent = 0 + will_send = 0 + double precision :: hij, sij, tmp + purge_task_id = 0 + provide psi_energy + ending(1) = dble(dress_N_cp+1) + ntask_tbd = 0 !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & - !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & - !$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) & - !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) - + !$OMP PRIVATE(breve_delta_m, task_id) & + !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & + !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & + !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) & + !$OMP PRIVATE(task_buf, ntask_buf,time, time0) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -99,276 +85,247 @@ subroutine run_dress_slave(thread,iproce,energy) call end_zmq_push_socket(zmq_socket_push,thread) stop "WORKER -1" end if - - iproc = omp_get_thread_num()+1 - allocate(int_buf(N_dress_int_buffer)) - allocate(double_buf(N_dress_double_buffer)) - allocate(det_buf(N_int, 2, N_dress_det_buffer)) - allocate(delta_ij_loc(N_states,N_det,2)) - do - call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - task = task//" 0" - if(task_id == 0) exit - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - if(task_id /= 0) then - read (task,*) subset, i_generator + allocate(breve_delta_m(N_states,N_det,2)) + allocate(task_buf(pt2_n_tasks_max)) + ntask_buf = 0 + + if(iproc==1) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + end if - !$OMP ATOMIC - done_for(done_cp_at_det(i_generator)) += 1 - ! print *, "IGEN", i_generator, done_cp_at_det(i_generator) - delta_ij_loc(:,:,:) = 0d0 - call generator_start(i_generator, iproc) - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) - - do i=1,N_cp - fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step - if(fac == 0d0) cycle - call omp_set_lock(lck_sto(i)) - cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) - cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) - call omp_unset_lock(lck_sto(i)) - end do - - - toothMwen = tooth_of_det(i_generator) - fracted = (toothMwen /= 0) - if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) - if(fracted) then - call omp_set_lock(lck_det(toothMwen)) - call omp_set_lock(lck_det(toothMwen-1)) - delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) - call omp_unset_lock(lck_det(toothMwen)) - call omp_unset_lock(lck_det(toothMwen-1)) - else - call omp_set_lock(lck_det(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) - call omp_unset_lock(lck_det(toothMwen)) - end if - call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - lastCp(iproc) = done_cp_at_det(i_generator) + do while(cp_done > cp_sent .or. m /= dress_N_cp+1) + call omp_set_lock(getting_task) + if(ntask_tbd == 0) then + ntask_tbd = size(task_tbd) + call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd) + !task = task//" 0" end if - + + task_id = task_tbd(1) + if(task_id /= 0) then + read (task(1),*) subset, i_generator + do i=1,size(task_tbd)-1 + task_tbd(i) = task_tbd(i+1) + task(i) = task(i+1) + end do + m = dress_P(i_generator) + ntask_tbd -= 1 + else + m = dress_N_cp + 1 + i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + end if + call omp_unset_lock(getting_task) + will_send = 0 + !$OMP CRITICAL - send = .false. - lastSendable = N_cp*2 - do i=1,Nproc - lastSendable = min(lastCp(i), lastSendable) - end do - lastSendable -= 1 - if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then - lastSent = lastSendable - cur_cp = lastSent - send = .true. + cp_max(iproc) = m + cp_done = minval(cp_max)-1 + if(cp_done > cp_sent) then + will_send = cp_sent + 1 + cp_sent = will_send + end if + if(purge_task_id == 0) then + purge_task_id = task_id + task_id = 0 + else if(task_id /= 0) then + ntask_buf += 1 + task_buf(ntask_buf) = task_id end if !$OMP END CRITICAL - if(send) then - N_buf = (/0,1,0/) - - delta_ij_loc = 0d0 - if(cur_cp < 1) stop "cur_cp < 1" - do i=1,cur_cp - delta_ij_loc(:,:,1) += cp(:,:,i,1) - delta_ij_loc(:,:,2) += cp(:,:,i,2) + if(will_send /= 0 .and. will_send <= int(ending(1))) then + call omp_set_lock(sending) + n_tasks = 0 + sum_f = 0 + do i=1,N_det_generators + if(dress_P(i) <= will_send) sum_f = sum_f + f(i) + if(dress_P(i) == will_send .and. f(i) /= 0) then + n_tasks += 1 + edI_task(n_tasks) = edI(i) + edI_index(n_tasks) = i + end if end do - - delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) - do i=cp_first_tooth(cur_cp)-1,0,-1 - delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1) - delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2) - end do - call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) + call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) + call omp_unset_lock(sending) end if + + if(m /= dress_N_cp+1) then + !UPDATE i_generator - if(task_id == 0) exit + breve_delta_m(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + time0 = omp_get_wtime() + call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc) + time = omp_get_wtime() + !print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 + t = dress_T(i_generator) + + call omp_set_lock(lck_det(t)) + do j=1,N_det + do i=1,N_states + delta_det(i,j,t, 1) = delta_det(i,j,t, 1) + breve_delta_m(i,j,1) + delta_det(i,j,t, 2) = delta_det(i,j,t, 2) + breve_delta_m(i,j,2) + enddo + enddo + call omp_unset_lock(lck_det(t)) + + do p=1,dress_N_cp + if(dress_e(i_generator, p) /= 0d0) then + fac = dress_e(i_generator, p) + call omp_set_lock(lck_sto(p)) + do j=1,N_det + do i=1,N_states + cp(i,j,p,1) = cp(i,j,p,1) + breve_delta_m(i,j,1) * fac + cp(i,j,p,2) = cp(i,j,p,2) + breve_delta_m(i,j,2) * fac + enddo + enddo + call omp_unset_lock(lck_sto(p)) + end if + end do + + tmp = 0d0 + do i=N_det,1,-1 + tmp += psi_coef(i, dress_stoch_istate)*breve_delta_m(dress_stoch_istate, i, 1) + end do + !$OMP ATOMIC + edI(i_generator) += tmp + !$OMP ATOMIC + f(i_generator) += 1 + !push bidon + if(ntask_buf == size(task_buf)) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + ntask_buf = 0 + end if + end if end do - + !$OMP BARRIER + if(ntask_buf /= 0) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + ntask_buf = 0 + end if + !$OMP SINGLE + if(purge_task_id /= 0) then + do while(int(ending(1)) == dress_N_cp+1) + call sleep(1) + i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + end do + + will_send = int(ending(1)) + breve_delta_m = 0d0 + + do l=will_send, 1,-1 + breve_delta_m(:,:,1) += cp(:,:,l,1) + breve_delta_m(:,:,2) += cp(:,:,l,2) + end do + + breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) + + do t=dress_dot_t(will_send)-1,0,-1 + breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1) + breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2) + end do + + sum_f = 0 + do i=1,N_det_generators + if(dress_P(i) <= will_send) sum_f = sum_f + f(i) + end do + call push_dress_results(zmq_socket_push, -will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id, 1) + end if + + !$OMP END SINGLE call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL - - do i=0,N_cp+1 + do i=0,dress_N_cp+1 call omp_destroy_lock(lck_sto(i)) end do - do i=0,comb_teeth+1 + do i=0,pt2_N_teeth+1 call omp_destroy_lock(lck_det(i)) end do end subroutine - -subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) +subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) use f77_zmq implicit none - - integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - real(kind=4), allocatable :: delta_loc4(:,:,:) - double precision, intent(in) :: double_buf(*) - integer, intent(in) :: int_buf(*) - integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) - integer, intent(in) :: N_bufi(3) - integer :: N_buf(3) - integer, intent(in) :: ind, cur_cp, task_id - integer :: rc, i, j, k, l - double precision :: contrib(N_states) - real(sendt), allocatable :: r4buf(:,:,:) - - rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - - if(cur_cp /= -1) then - allocate(r4buf(N_states, N_det, 2)) - do i=1,2 - do j=1,N_det - do k=1,N_states - r4buf(k,j,i) = real(delta_loc(k,j,i), sendt) - end do - end do - end do - - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE) - if(rc /= sendt*N_states*N_det) stop "push" - - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE) - if(rc /= sendt*N_states*N_det) stop "push" - else - contrib = 0d0 - do i=1,N_det - contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) - end do - - rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" + integer, intent(in) :: m_task, f, edI_index(n_tasks) + double precision, intent(in) :: breve_delta_m(N_states, N_det, 2), edI_task(n_tasks) + integer, intent(in) :: task_id(pt2_n_tasks_max), n_tasks + integer :: rc, i, j, k + rc = f77_zmq_send( zmq_socket_push, m_task, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push3" - N_buf = N_bufi - !N_buf = (/0,1,0/) - - rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) - if(rc /= 4*3) stop "push5" - - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) - if(rc /= 4*N_buf(1)) stop "push6" - end if - - if(N_buf(2) > 0) then - rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) - if(rc /= 8*N_buf(2)) stop "push8" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" - end if - - rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push11" - end if - -! Activate is zmq_socket_push is a REQ + if(m_task > 0) then + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" + rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) + if(rc /= 8*n_tasks) stop "push5" + rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "push6" + else if(m_task == 0) then + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" + rc = f77_zmq_send( zmq_socket_push, task_id, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "push2" + else + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" + rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, ZMQ_SNDMORE) + if(rc /= 8*N_det*N_states*2) stop "push6" + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "push6" + end if +! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE character*(2) :: ok rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) IRP_ENDIF - end subroutine -BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ] - -END_PROVIDER -subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) +subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) use f77_zmq implicit none - integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - integer, intent(out) :: cur_cp - double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - double precision, intent(out) :: double_buf(*), contrib(N_states) - integer, intent(out) :: int_buf(*) - integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) - integer, intent(out) :: ind - integer, intent(out) :: task_id + integer, intent(out) :: m_task, f, edI_index(N_det_generators) + double precision, intent(out) :: breve_delta_m(N_states, N_det, 2), edI_task(N_det_generators) + integer, intent(out) :: task_id(pt2_n_tasks_max), n_tasks integer :: rc, i, j, k - integer, intent(out) :: N_buf(3) - - rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) - if(rc /= 4) stop "pulla" - rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0) - if(rc /= 4) stop "pulla" - - - + rc = f77_zmq_recv( zmq_socket_pull, m_task, 4, 0) + if(rc /= 4) stop "pullc" - if(cur_cp /= -1) then - - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0) - if(rc /= sendt*N_states*N_det) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0) - if(rc /= sendt*N_states*N_det) stop "pulld" - - do i=1,2 - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) - do j=1,N_det - do k=1,N_states - delta_loc(k,j,i) = real(real4buf(k,j,i), 8) - end do - end do - end do - else - rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) - if(rc /= 8*N_states) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) - if(rc /= 4*3) stop "pull" - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) - if(rc /= 4*N_buf(1)) stop "pull1" + if(m_task > 0) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0) + if(rc /= 8*n_tasks) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pullc" + else if(m_task==0) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pull4" + else + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) + if(rc /= 8*N_det*N_states*2) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "pull4" end if - - if(N_buf(2) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) - if(rc /= 8*N_buf(2)) stop "pull2" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" - end if - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull4" - end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE diff --git a/plugins/shiftedbk/EZFIO.cfg b/plugins/shiftedbk/EZFIO.cfg index 6123f458..6069c855 100644 --- a/plugins/shiftedbk/EZFIO.cfg +++ b/plugins/shiftedbk/EZFIO.cfg @@ -26,3 +26,9 @@ doc: Type of zeroth-order Hamiltonian [ EN | Barycentric ] interface: ezfio,provider,ocaml default: EN +[dress_relative_error] +type: Normalized_float +doc: Stop stochastic dressing when the relative error is smaller than PT2_relative_error +interface: ezfio,provider,ocaml +default: 0.001 + diff --git a/plugins/shiftedbk/shifted_bk_iter.irp.f b/plugins/shiftedbk/shifted_bk_iter.irp.f deleted file mode 100644 index 429efa4b..00000000 --- a/plugins/shiftedbk/shifted_bk_iter.irp.f +++ /dev/null @@ -1,159 +0,0 @@ -program shifted_bk - implicit none - integer :: i,j,k - double precision, allocatable :: pt2(:) - integer :: degree - integer :: n_det_before - double precision :: threshold_davidson_in - - allocate (pt2(N_states)) - - double precision :: hf_energy_ref - logical :: has - double precision :: relative_error, absolute_error - integer :: N_states_p - character*(512) :: fmt - - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order - PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order - - - pt2 = -huge(1.e0) - threshold_davidson_in = threshold_davidson - threshold_davidson = threshold_davidson_in * 100.d0 - SOFT_TOUCH threshold_davidson - - call diagonalize_CI_dressed - call save_wavefunction - - call ezfio_has_hartree_fock_energy(has) - if (has) then - call ezfio_get_hartree_fock_energy(hf_energy_ref) - else - hf_energy_ref = ref_bitmask_energy - endif - - if (N_det > N_det_max) then - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - N_det = N_det_max - soft_touch N_det psi_det psi_coef - call diagonalize_CI_dressed - call save_wavefunction - N_states_p = min(N_det,N_states) - endif - - n_det_before = 0 - - character*(8) :: pt2_string - double precision :: threshold_selectors_save, threshold_generators_save - threshold_selectors_save = threshold_selectors - threshold_generators_save = threshold_generators - double precision :: error(N_states), energy(N_states) - error = 0.d0 - - threshold_selectors = 1.d0 - threshold_generators = 1d0 - - if (.True.) then - pt2_string = '(sh-Bk) ' - do while ( (N_det < N_det_max) ) - write(*,'(A)') '--------------------------------------------------------------------------------' - - N_det_delta_ij = N_det - - do i=1,N_states - energy(i) = psi_energy(i)+nuclear_repulsion - enddo - - PROVIDE delta_ij_tmp - call delta_ij_done() - - call diagonalize_ci_dressed - do i=1,N_states - pt2(i) = ci_energy_dressed(i) - energy(i) - enddo - - N_states_p = min(N_det,N_states) - - print *, '' - print '(A,I12)', 'Summary at N_det = ', N_det - print '(A)', '-----------------------------------' - print *, '' - print *, '' - - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(12X,', N_states_p, '(6X,A7,1X,I6,10X))' - write(*,fmt) ('State',k, k=1,N_states_p) - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(A12,', N_states_p, '(1X,F14.8,15X))' - write(*,fmt) '# E ', energy(1:N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', energy(1:N_states_p)-energy(1) - write(*,fmt) '# Excit. (eV)', (energy(1:N_states_p)-energy(1))*27.211396641308d0 - endif - write(fmt,*) '(A12,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) - write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (energy(k)+pt2(k),error(k), k=1,N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (energy(k)+pt2(k)-energy(1)-pt2(1)), & - dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (energy(k)+pt2(k)-energy(1)-pt2(1))*27.211396641308d0, & - dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) - endif - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - print *, '' - - print *, 'N_det = ', N_det - print *, 'N_states = ', N_states - - do k=1, N_states_p - print*,'State ',k - print *, 'PT2 = ', pt2(k) - print *, 'E = ', energy(k) - print *, 'E+PT2'//pt2_string//' = ', energy(k)+pt2(k) - enddo - - print *, '-----' - if(N_states.gt.1)then - print *, 'Variational Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (energy(i) - energy(1)), & - (energy(i) - energy(1)) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (energy(i)+ pt2(i) - (energy(1) + pt2(1))), & - (energy(i)+ pt2(i) - (energy(1) + pt2(1))) * 27.211396641308d0 - enddo - endif - call ezfio_set_shiftedbk_energy_pt2(energy(1)+pt2(1)) -! call dump_fci_iterations_value(N_det,energy,pt2) - - n_det_before = N_det - - PROVIDE psi_coef - PROVIDE psi_det - PROVIDE psi_det_sorted - - if (N_det >= N_det_max) then - threshold_davidson = threshold_davidson_in - end if - call save_wavefunction - call ezfio_set_shiftedbk_energy(energy(1)) - call ezfio_set_shiftedbk_energy_pt2(ci_energy_dressed(1)) - enddo - endif - - - - -end - diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index bab8490d..216f9ec3 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -298,9 +298,8 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - + call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) - slave_sum_alpha2(:,iproc) += c_alpha(:)**2 if(contrib < sb(iproc)%mini) then call add_to_selection_buffer(sb(iproc), alpha, contrib) diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg index 49a0f778..315588c1 100644 --- a/src/Davidson/EZFIO.cfg +++ b/src/Davidson/EZFIO.cfg @@ -8,7 +8,7 @@ default: 1.e-12 type: States_number doc: Number of states to consider during the Davdison diagonalization default: 4 -interface: ezfio,provider,ocaml +interface: ezfio,ocaml [davidson_sze_max] type: Strictly_positive_int diff --git a/src/Davidson/ezfio.irp.f b/src/Davidson/ezfio.irp.f new file mode 100644 index 00000000..a22bd456 --- /dev/null +++ b/src/Davidson/ezfio.irp.f @@ -0,0 +1,35 @@ +BEGIN_PROVIDER [ integer, n_states_diag ] + implicit none + BEGIN_DOC +! Number of states to consider during the Davdison diagonalization + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + call ezfio_has_davidson_n_states_diag(has) + if (has) then + call ezfio_get_davidson_n_states_diag(n_states_diag) + else + print *, 'davidson/n_states_diag not found in EZFIO file' + stop 1 + endif + n_states_diag = max(N_states, N_states_diag) + endif + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( n_states_diag, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read n_states_diag with MPI' + endif + IRP_ENDIF + + call write_time(6) + if (mpi_master) then + write(6, *) 'Read n_states_diag' + endif + +END_PROVIDER + diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 3e5610c8..38e3f293 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -28,6 +28,7 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 6e972114..2d42e849 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -167,8 +167,7 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) end select end - -subroutine get_double_excitation(det1,det2,exc,phase,Nint) +subroutine get_double_excitation_ref(det1,det2,exc,phase,Nint) use bitmasks implicit none BEGIN_DOC @@ -312,6 +311,137 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) end +subroutine get_phasemask_bit(det1, pm, Nint) + use bitmasks + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(out) :: pm(Nint,2) + integer(bit_kind) :: tmp + integer :: ispin, i + do ispin=1,2 + tmp = 0_8 + do i=1,Nint + pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32)) + pm(i,ispin) = xor(pm(i,ispin), tmp) + if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + end do + end do +end subroutine + + +subroutine get_double_excitation(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the two excitation operators between two doubly excited determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(in) :: det2(Nint,2) + integer, intent(out) :: exc(0:2,2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, ispin, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp, pm(Nint,2) + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + double precision :: refaz + logical :: ok + + ASSERT (Nint > 0) + + !do ispin=1,2 + !tmp = 0_8 + !do i=1,Nint + ! pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32)) + ! pm(i,ispin) = xor(pm(i,ispin), tmp) + ! if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + !end do + !end do + call get_phasemask_bit(det1, pm, Nint) + nperm = 0 + exc(0,1,1) = 0 + exc(0,2,1) = 0 + exc(0,1,2) = 0 + exc(0,2,2) = 0 + do ispin = 1,2 + idx_particle = 0 + idx_hole = 0 + ishift = 1-bit_kind_size + !par = 0 + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l,ispin) == det2(l,ispin)) then + cycle + endif + tmp = xor( det1(l,ispin), det2(l,ispin) ) + particle = iand(tmp, det2(l,ispin)) + hole = iand(tmp, det1(l,ispin)) + do while (particle /= 0_bit_kind) + tz = trailz(particle) + nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1) + idx_particle = idx_particle + 1 + exc(0,2,ispin) = exc(0,2,ispin) + 1 + exc(idx_particle,2,ispin) = tz+ishift + particle = iand(particle,particle-1_bit_kind) + enddo + if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)==2 + exit + endif + do while (hole /= 0_bit_kind) + tz = trailz(hole) + nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1) + idx_hole = idx_hole + 1 + exc(0,1,ispin) = exc(0,1,ispin) + 1 + exc(idx_hole,1,ispin) = tz+ishift + hole = iand(hole,hole-1_bit_kind) + enddo + if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin) + exit + endif + enddo + + select case (exc(0,1,ispin)) + case(0) + cycle + + case(1) + if(exc(1,1,ispin) < exc(1,2,ispin)) nperm = nperm+1 + + case (2) + a = exc(1,1,ispin) + b = exc(1,2,ispin) + c = exc(2,1,ispin) + d = exc(2,2,ispin) + + if(min(a,c) > max(b,d) .or. min(b,d) > max(a,c) .or. (a-b)*(c-d)<0) then + nperm = nperm + 1 + end if + exit + end select + + enddo + phase = phase_dble(iand(nperm,1)) + !call get_double_excitation_ref(det1,det2,exc,refaz,Nint) + !if(phase == refaz) then + ! print *, "phase", phase, refaz, n, exc(0,1,1) + !end if +end + subroutine get_mono_excitation(det1,det2,exc,phase,Nint) use bitmasks implicit none From f8bda54c75b4d23756980a1789b349df3ef42b2d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 9 Sep 2018 12:20:33 +0200 Subject: [PATCH 02/28] Fixed MPI --- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 1 + .../selection_davidson_slave.irp.f | 15 ++++++++--- .../Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f | 2 +- plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f | 2 +- plugins/Selectors_Utils/zmq.irp.f | 4 +++ .../ezfio_generate_provider.py | 4 +++ src/Bitmask/bitmasks.irp.f | 24 ++++++++++++++++++ src/Bitmask/mpi.irp.f | 4 +++ src/Davidson/davidson_parallel.irp.f | 13 ++++++++-- src/Davidson/{ezfio.irp.f => input.irp.f} | 4 +++ src/Determinants/determinants.irp.f | 16 ++++++++++++ src/Determinants/zmq.irp.f | 12 +++++++++ src/MO_Basis/mos.irp.f | 16 ++++++++++++ src/MPI/mpi.irp.f | 5 ++++ src/Nuclei/nuclei.irp.f | 12 +++++++++ src/ZMQ/put_get.irp.f | 25 +++++++++++-------- 16 files changed, 141 insertions(+), 18 deletions(-) rename src/Davidson/{ezfio.irp.f => input.irp.f} (88%) diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 732c8ca8..6d8b6a8c 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -77,6 +77,7 @@ subroutine run_pt2_slave(thread,iproc,energy) ! Try to adjust n_tasks around 1 second per job n_tasks = min(n_tasks,int( 1.d0*dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 +! n_tasks = n_tasks+1 end do integer, external :: disconnect_from_taskserver diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 415270f1..f3534240 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -13,9 +13,10 @@ program selection_slave end subroutine provide_everything - PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context n_states_diag + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag PROVIDE pt2_e0_denominator mo_tot_num N_int ci_energy mpi_master zmq_state zmq_context - PROVIDE psi_det psi_coef + PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight + PROVIDE N_det_selectors pt2_stoch_istate N_det end subroutine run_wf @@ -39,8 +40,6 @@ subroutine run_wf integer, external :: zmq_get_psi, zmq_get_N_det_selectors integer, external :: zmq_get_N_states_diag - call provide_everything - zmq_context = f77_zmq_ctx_new () states(1) = 'selection' states(2) = 'davidson' @@ -49,6 +48,10 @@ subroutine run_wf zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master + PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator + PROVIDE N_det_generators N_states N_states_diag + call MPI_BARRIER(MPI_COMM_WORLD, ierr) do if (mpi_master) then @@ -62,6 +65,10 @@ subroutine run_wf print *, trim(zmq_state) endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then diff --git a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f index 04a1d9d4..dbe436ff 100644 --- a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f @@ -1,4 +1,4 @@ -program fci_zmq +program target_pt2_ratio implicit none integer :: i,j,k logical, external :: detEq diff --git a/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f b/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f index 52f825f1..851190be 100644 --- a/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f @@ -1,4 +1,4 @@ -program fci_zmq +program target_pt2 implicit none integer :: i,j,k logical, external :: detEq diff --git a/plugins/Selectors_Utils/zmq.irp.f b/plugins/Selectors_Utils/zmq.irp.f index b32436aa..375a77d1 100644 --- a/plugins/Selectors_Utils/zmq.irp.f +++ b/plugins/Selectors_Utils/zmq.irp.f @@ -72,6 +72,10 @@ integer function zmq_get_$X(zmq_to_qp_run_socket, worker_id) 10 continue + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/scripts/ezfio_interface/ezfio_generate_provider.py b/scripts/ezfio_interface/ezfio_generate_provider.py index 4a8e7ec4..d6dcc716 100755 --- a/scripts/ezfio_interface/ezfio_generate_provider.py +++ b/scripts/ezfio_interface/ezfio_generate_provider.py @@ -32,6 +32,10 @@ BEGIN_PROVIDER [ %(type)s, %(name)s %(size)s ] stop 1 endif endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/Bitmask/bitmasks.irp.f b/src/Bitmask/bitmasks.irp.f index fcfe03c8..d5472a25 100644 --- a/src/Bitmask/bitmasks.irp.f +++ b/src/Bitmask/bitmasks.irp.f @@ -127,6 +127,10 @@ BEGIN_PROVIDER [ integer, N_generators_bitmask ] ASSERT (N_generators_bitmask > 0) call write_int(6,N_generators_bitmask,'N_generators_bitmask') endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -170,6 +174,10 @@ BEGIN_PROVIDER [ integer, N_generators_bitmask_restart ] ASSERT (N_generators_bitmask_restart > 0) call write_int(6,N_generators_bitmask_restart,'N_generators_bitmask_restart') endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -244,6 +252,10 @@ BEGIN_PROVIDER [ integer(bit_kind), generators_bitmask_restart, (N_int,2,6,N_gen enddo enddo endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -313,6 +325,10 @@ if (mpi_master) then enddo enddo endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -354,6 +370,10 @@ BEGIN_PROVIDER [ integer, N_cas_bitmask ] call write_int(6,N_cas_bitmask,'N_cas_bitmask') endif ASSERT (N_cas_bitmask > 0) + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -407,6 +427,10 @@ BEGIN_PROVIDER [ integer(bit_kind), cas_bitmask, (N_int,2,N_cas_bitmask) ] enddo write(*,*) 'Read CAS bitmask' endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/Bitmask/mpi.irp.f b/src/Bitmask/mpi.irp.f index 18af1ca3..11d6777a 100644 --- a/src/Bitmask/mpi.irp.f +++ b/src/Bitmask/mpi.irp.f @@ -26,6 +26,10 @@ subroutine broadcast_chunks_bit_kind(A, LDA) BEGIN_DOC ! Broadcast with chunks of ~2GB END_DOC + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: i, sze, ierr diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 15eede23..0a0881a6 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -13,7 +13,6 @@ end subroutine davidson_slave_tcp(i) implicit none integer, intent(in) :: i - call davidson_run_slave(0,i) end @@ -36,6 +35,10 @@ subroutine davidson_run_slave(thread,iproc) integer, external :: connect_to_taskserver + +include 'mpif.h' +integer ierr + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) endif @@ -86,11 +89,13 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, allocate (energy(N_st)) if (zmq_get_dvector(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, size(u_t)) == -1) then + print *, irp_here, ': Unable to get u_t' deallocate(u_t,energy) return endif if (zmq_get_dvector(zmq_to_qp_run_socket, worker_id, 'energy', energy, size(energy)) == -1) then + print *, irp_here, ': Unable to get energy' deallocate(u_t,energy) return endif @@ -467,10 +472,13 @@ integer function zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id) if (rc /= 4) go to 10 endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr - call MPI_BCAST (zmq_get_N_states_diag, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then print *, irp_here//': Unable to broadcast N_states' @@ -484,6 +492,7 @@ integer function zmq_get_N_states_diag(zmq_to_qp_run_socket, worker_id) endif endif IRP_ENDIF + TOUCH N_states_diag return diff --git a/src/Davidson/ezfio.irp.f b/src/Davidson/input.irp.f similarity index 88% rename from src/Davidson/ezfio.irp.f rename to src/Davidson/input.irp.f index a22bd456..2904176c 100644 --- a/src/Davidson/ezfio.irp.f +++ b/src/Davidson/input.irp.f @@ -17,6 +17,10 @@ BEGIN_PROVIDER [ integer, n_states_diag ] endif n_states_diag = max(N_states, N_states_diag) endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 3db76eef..f04a85a5 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -45,6 +45,10 @@ BEGIN_PROVIDER [ integer, N_det ] endif call write_int(6,N_det,'Number of determinants') endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -89,6 +93,10 @@ BEGIN_PROVIDER [ integer, psi_det_size ] psi_det_size = max(psi_det_size,100000) call write_int(6,psi_det_size,'Dimension of the psi arrays') endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -154,6 +162,10 @@ BEGIN_PROVIDER [ integer(bit_kind), psi_det, (N_int,2,psi_det_size) ] enddo endif endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -212,6 +224,10 @@ BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states) ] endif endif endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/Determinants/zmq.irp.f b/src/Determinants/zmq.irp.f index e8edc1a8..6c25173a 100644 --- a/src/Determinants/zmq.irp.f +++ b/src/Determinants/zmq.irp.f @@ -112,6 +112,10 @@ integer function zmq_get_$X(zmq_to_qp_run_socket, worker_id) endif 10 continue + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -297,6 +301,10 @@ integer function zmq_get_psi_det(zmq_to_qp_run_socket, worker_id) endif 10 continue + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -346,6 +354,10 @@ integer function zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) 10 continue + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/MO_Basis/mos.irp.f b/src/MO_Basis/mos.irp.f index 3c920d67..2662c6e6 100644 --- a/src/MO_Basis/mos.irp.f +++ b/src/MO_Basis/mos.irp.f @@ -9,6 +9,10 @@ BEGIN_PROVIDER [ integer, mo_tot_num ] if (mpi_master) then call ezfio_has_mo_basis_mo_tot_num(has) endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -71,6 +75,10 @@ BEGIN_PROVIDER [ double precision, mo_coef, (ao_num,mo_tot_num) ] ! Coefs call ezfio_has_mo_basis_mo_coef(exists) endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -136,6 +144,10 @@ BEGIN_PROVIDER [ character*(64), mo_label ] endif write(*,*) '* mo_label ', trim(mo_label) endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -198,6 +210,10 @@ BEGIN_PROVIDER [ double precision, mo_occ, (mo_tot_num) ] endif write(*,*) 'Read mo_occ' endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/MPI/mpi.irp.f b/src/MPI/mpi.irp.f index f24a2923..41694c8f 100644 --- a/src/MPI/mpi.irp.f +++ b/src/MPI/mpi.irp.f @@ -70,6 +70,10 @@ subroutine broadcast_chunks_$double(A, LDA) BEGIN_DOC ! Broadcast with chunks of ~2GB END_DOC + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: i, sze, ierr @@ -80,6 +84,7 @@ subroutine broadcast_chunks_$double(A, LDA) print *, irp_here//': Unable to broadcast chunks $double ', i stop -1 endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) enddo IRP_ENDIF end diff --git a/src/Nuclei/nuclei.irp.f b/src/Nuclei/nuclei.irp.f index 3528bf50..b2d1f54b 100644 --- a/src/Nuclei/nuclei.irp.f +++ b/src/Nuclei/nuclei.irp.f @@ -54,6 +54,10 @@ BEGIN_PROVIDER [ double precision, nucl_coord, (nucl_num,3) ] endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -159,6 +163,10 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ] endif print*, 'Read nuclear_repulsion' endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr @@ -228,6 +236,10 @@ END_PROVIDER close(10) endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI include 'mpif.h' integer :: ierr diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f index 5269bd5e..207cb0ae 100644 --- a/src/ZMQ/put_get.irp.f +++ b/src/ZMQ/put_get.irp.f @@ -60,17 +60,20 @@ integer function zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_ rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) if (rc /= len(trim(msg))) then zmq_get_dvector = -1 + print *, irp_here, 'rc /= len(trim(msg))', rc, len(trim(msg)) go to 10 endif rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) if (msg(1:14) /= 'get_data_reply') then + print *, irp_here, 'msg(1:14) /= get_data_reply', msg(1:14) zmq_get_dvector = -1 go to 10 endif rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0) if (rc /= size_x*8) then + print *, irp_here, 'rc /= size_x*8', rc, size_x*8 zmq_get_dvector = -1 go to 10 endif @@ -78,6 +81,10 @@ integer function zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_ 10 continue + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI integer :: ierr include 'mpif.h' @@ -86,11 +93,8 @@ integer function zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_ print *, irp_here//': Unable to broadcast zmq_get_dvector' stop -1 endif - call MPI_BCAST (x, size_x, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - print *, irp_here//': Unable to broadcast dvector' - stop -1 - endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) + call broadcast_chunks_double(x, size_x) IRP_ENDIF end @@ -177,6 +181,10 @@ integer function zmq_get_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_ 10 continue + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF IRP_IF MPI integer :: ierr include 'mpif.h' @@ -185,11 +193,8 @@ integer function zmq_get_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_ print *, irp_here//': Unable to broadcast zmq_get_ivector' stop -1 endif - call MPI_BCAST (x, size_x, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - print *, irp_here//': Unable to broadcast ivector' - stop -1 - endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) + call broadcast_chunks_integer(x, size_x) IRP_ENDIF end From ad7398f9120e59270c939a02865d6311982f45b2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Sep 2018 15:15:19 +0200 Subject: [PATCH 03/28] Fixed Fragments --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 12 +++++++----- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 18 +++++++++++------- plugins/Full_CI_ZMQ/selection.irp.f | 4 +++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 3c8e797b..4f9138bc 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -6,7 +6,6 @@ BEGIN_PROVIDER [ integer, pt2_stoch_istate ] pt2_stoch_istate = 1 END_PROVIDER - BEGIN_PROVIDER [ integer, pt2_N_teeth ] &BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] &BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] @@ -14,11 +13,14 @@ END_PROVIDER implicit none logical, external :: testTeethBuilding integer :: i - pt2_F(:) = 1 - pt2_n_tasks_max = N_det_generators/100 + 1 + integer :: e + e = elec_num - n_core_orb * 2 + pt2_n_tasks_max = min(1+(e*(e-1))/2, int(dsqrt(dble(N_det_generators)))) do i=1,N_det_generators - if (maxval(dabs(psi_coef_sorted_gen(i,:))) > 0.005d0) then - pt2_F(i) = max(1,min( ((elec_alpha_num-n_core_orb)**2)/4, pt2_n_tasks_max)) + if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.0001d0) then + pt2_F(i) = pt2_n_tasks_max + else + pt2_F(i) = 1 endif enddo diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 6d8b6a8c..2be0bd88 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -43,10 +43,11 @@ subroutine run_pt2_slave(thread,iproc,energy) call create_selection_buffer(0, 0, buf) done = .False. + n_tasks = 1 do while (.not.done) - n_tasks = max(1,n_tasks) - n_tasks = min(n_tasks,pt2_n_tasks_max) +! n_tasks = max(1,n_tasks) +! n_tasks = min(pt2_n_tasks_max,n_tasks) integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then @@ -61,13 +62,17 @@ subroutine run_pt2_slave(thread,iproc,energy) enddo double precision :: time0, time1 - call wall_time(time0) +! call wall_time(time0) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 +!double precision :: time2 +!call wall_time(time2) call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k))) +!call wall_time(time1) +!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) enddo - call wall_time(time1) +! call wall_time(time1) integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then @@ -75,9 +80,8 @@ subroutine run_pt2_slave(thread,iproc,energy) endif call push_pt2_results(zmq_socket_push, i_generator, pt2, task_id, n_tasks) - ! Try to adjust n_tasks around 1 second per job - n_tasks = min(n_tasks,int( 1.d0*dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 -! n_tasks = n_tasks+1 + ! Try to adjust n_tasks around nproc seconds per job +! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc) / (time1 - time0 + 1.d0))) end do integer, external :: disconnect_from_taskserver diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 047a0b26..81dea087 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -409,9 +409,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2)) allocate (mat(N_states, mo_tot_num, mo_tot_num)) maskInd = -1 - integer :: nb_count + integer :: nb_count, maskInd_save + logical :: found do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first + h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) From 2cb6b48ecba9a5bd89369587ec4956c0d5a9916b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Sep 2018 15:15:19 +0200 Subject: [PATCH 04/28] Fixed Fragments --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 12 +++++++----- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 18 +++++++++++------- plugins/Full_CI_ZMQ/selection.irp.f | 4 +++- .../Full_CI_ZMQ/selection_davidson_slave.irp.f | 4 +++- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 3c8e797b..4f9138bc 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -6,7 +6,6 @@ BEGIN_PROVIDER [ integer, pt2_stoch_istate ] pt2_stoch_istate = 1 END_PROVIDER - BEGIN_PROVIDER [ integer, pt2_N_teeth ] &BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] &BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] @@ -14,11 +13,14 @@ END_PROVIDER implicit none logical, external :: testTeethBuilding integer :: i - pt2_F(:) = 1 - pt2_n_tasks_max = N_det_generators/100 + 1 + integer :: e + e = elec_num - n_core_orb * 2 + pt2_n_tasks_max = min(1+(e*(e-1))/2, int(dsqrt(dble(N_det_generators)))) do i=1,N_det_generators - if (maxval(dabs(psi_coef_sorted_gen(i,:))) > 0.005d0) then - pt2_F(i) = max(1,min( ((elec_alpha_num-n_core_orb)**2)/4, pt2_n_tasks_max)) + if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.0001d0) then + pt2_F(i) = pt2_n_tasks_max + else + pt2_F(i) = 1 endif enddo diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 6d8b6a8c..2be0bd88 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -43,10 +43,11 @@ subroutine run_pt2_slave(thread,iproc,energy) call create_selection_buffer(0, 0, buf) done = .False. + n_tasks = 1 do while (.not.done) - n_tasks = max(1,n_tasks) - n_tasks = min(n_tasks,pt2_n_tasks_max) +! n_tasks = max(1,n_tasks) +! n_tasks = min(pt2_n_tasks_max,n_tasks) integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then @@ -61,13 +62,17 @@ subroutine run_pt2_slave(thread,iproc,energy) enddo double precision :: time0, time1 - call wall_time(time0) +! call wall_time(time0) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 +!double precision :: time2 +!call wall_time(time2) call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k))) +!call wall_time(time1) +!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) enddo - call wall_time(time1) +! call wall_time(time1) integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then @@ -75,9 +80,8 @@ subroutine run_pt2_slave(thread,iproc,energy) endif call push_pt2_results(zmq_socket_push, i_generator, pt2, task_id, n_tasks) - ! Try to adjust n_tasks around 1 second per job - n_tasks = min(n_tasks,int( 1.d0*dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 -! n_tasks = n_tasks+1 + ! Try to adjust n_tasks around nproc seconds per job +! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc) / (time1 - time0 + 1.d0))) end do integer, external :: disconnect_from_taskserver diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 047a0b26..81dea087 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -409,9 +409,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2)) allocate (mat(N_states, mo_tot_num, mo_tot_num)) maskInd = -1 - integer :: nb_count + integer :: nb_count, maskInd_save + logical :: found do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first + h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index f3534240..fb214ad9 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -51,7 +51,9 @@ subroutine run_wf PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator PROVIDE N_det_generators N_states N_states_diag - call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF do if (mpi_master) then From 06ffc784eb5e9cd8703a575b90a6ea8341ef177c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Sep 2018 15:45:38 +0200 Subject: [PATCH 05/28] Fixed sBk --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 20 ++- plugins/Full_CI_ZMQ/selection.irp.f | 27 ++- plugins/dress_zmq/dress_slave.irp.f | 6 - plugins/dress_zmq/dress_stoch_routines.irp.f | 89 ++++++--- plugins/dress_zmq/run_dress_slave.irp.f | 75 ++++---- plugins/shiftedbk/shifted_bk_slave.irp.f | 179 ++++++++++++++++++- src/ZMQ/put_get.irp.f | 82 ++++++++- 7 files changed, 388 insertions(+), 90 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 4f9138bc..3c944cbc 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -15,9 +15,9 @@ END_PROVIDER integer :: i integer :: e e = elec_num - n_core_orb * 2 - pt2_n_tasks_max = min(1+(e*(e-1))/2, int(dsqrt(dble(N_det_generators)))) + pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10) do i=1,N_det_generators - if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.0001d0) then + if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.001d0) then pt2_F(i) = pt2_n_tasks_max else pt2_F(i) = 1 @@ -158,9 +158,19 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error) endif + integer, external :: add_task_to_taskserver - character(len=64000) :: task + character(len=64000000) :: task integer :: j,k,ipos + + ipos=0 + do i=1,N_det_generators + if (pt2_F(i) > 1) then + ipos += 1 + endif + enddo + call write_int(6,ipos,'Number of fragmented tasks') + ipos=1 task = ' ' @@ -168,7 +178,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error) do j=1,pt2_F(pt2_J(i)) write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) ipos += 20 - if (ipos > 63980) then + if (ipos > len(task)-20) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' endif @@ -328,7 +338,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error) print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E, eqt, time-time0, '' if( dabs(error(pt2_stoch_istate) / pt2(pt2_stoch_istate)) < relative_error) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) + call sleep(10) if (zmq_abort(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Error in sending abort signal (2)' endif diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 81dea087..588790cc 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -357,6 +357,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d endif enddo enddo + deallocate(exc_degree) nmax=k-1 @@ -404,16 +405,36 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end do deallocate(indices) - allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2)) allocate (mat(N_states, mo_tot_num, mo_tot_num)) maskInd = -1 - integer :: nb_count, maskInd_save + + integer :: nb_count, maskInd_save, monoBdo_save logical :: found + do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first + monoBdo_save = monoBdo + maskInd_save = maskInd + do s2=s1,2 + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=N_holes(s2),ib,-1 + maskInd += 1 + if(mod(maskInd, csubset) == (subset-1)) then + found = .True. + end if + enddo + if(s1 /= s2) monoBdo = .false. + enddo + + if (.not.found) cycle + monoBdo = monoBdo_save + maskInd = maskInd_save + + h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) @@ -526,8 +547,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d enddo end if end do - - do s2=s1,2 sp = s1 diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 33238df2..5e575901 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -64,11 +64,7 @@ subroutine run_wf PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - !!$OMP PARALLEL PRIVATE(i) - !i = omp_get_thread_num() -! call dress_slave_tcp(i+1, energy) call dress_slave_tcp(0, energy) - !!$OMP END PARALLEL endif end do end @@ -77,8 +73,6 @@ subroutine dress_slave_tcp(i,energy) implicit none double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: i - logical :: lstop - lstop = .False. call run_dress_slave(0,i,energy) end diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 3b9d128d..c07c3110 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -13,20 +13,24 @@ END_PROVIDER implicit none logical, external :: testTeethBuilding integer :: i - pt2_F(:) = 1 - pt2_n_tasks_max = 20 -! do i=1,N_det_generators -! if (maxval(dabs(psi_coef_sorted_gen(i,:))) > 0.001d0) then -! pt2_F(i) = max(1,min( (elec_alpha_num-n_core_orb)**2, pt2_n_tasks_max)) -! endif -! enddo + integer :: e + e = elec_num - n_core_orb * 2 + pt2_n_tasks_max = 1 + min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10) + do i=1,N_det_generators + if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.001d0) then + pt2_F(i) = pt2_n_tasks_max + else + pt2_F(i) = 1 + endif + enddo + if(N_det_generators < 1024) then pt2_minDetInFirstTeeth = 1 pt2_N_teeth = 1 else pt2_minDetInFirstTeeth = min(5, N_det_generators) - do pt2_N_teeth=100,2,-1 + do pt2_N_teeth=20,2,-1 if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit end do end if @@ -219,7 +223,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) implicit none - character(len=64000) :: task + character(len=64000000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -232,8 +236,8 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer :: i, j, k, Ncp - integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) + PROVIDE Nproc task(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) @@ -254,7 +258,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer, external :: zmq_put_N_det_generators integer, external :: zmq_put_N_det_selectors integer, external :: zmq_put_dvector - integer, external :: zmq_set_running + integer, external :: zmq_put_int if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then stop 'Unable to put psi on ZMQ server' @@ -271,25 +275,59 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) if (zmq_put_dvector(zmq_to_qp_run_socket,1,"state_average_weight",state_average_weight,N_states) == -1) then stop 'Unable to put state_average_weight on ZMQ server' endif - if (zmq_put_dvector(zmq_to_qp_run_socket,1,"dress_stoch_istate",real(dress_stoch_istate,8),1) == -1) then + if (zmq_put_int(zmq_to_qp_run_socket,1,"dress_stoch_istate",dress_stoch_istate) == -1) then stop 'Unable to put dress_stoch_istate on ZMQ server' endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then + stop 'Unable to put threshold_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then + stop 'Unable to put threshold_generators on ZMQ server' + endif - integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + + call write_int(6,pt2_n_tasks_max,'Max number of task fragments') - do i=1,N_det_generators - do j=1,pt2_F(pt2_J(i)) - write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + integer, external :: add_task_to_taskserver + integer :: ipos + ipos=0 + do i=1,N_det_generators + if (pt2_F(i) > 1) then + ipos += 1 + endif + enddo + call write_int(6,ipos,'Number of fragmented tasks') + + + ipos=1 + task = ' ' + + do i= 1, N_det_generators + do j=1,pt2_F(pt2_J(i)) + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) + ipos += 20 + if (ipos > len(task)-20) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + endif + end do + enddo + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' endif - end do - end do - if (zmq_set_running(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Failed in zmq_set_running' - endif + endif + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + integer :: nproc_target nproc_target = nproc @@ -495,14 +533,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, m += 1 if(dabs(error / avg) <= relative_error) then integer, external :: zmq_put_dvector - i= zmq_put_dvector(zmq_to_qp_run_socket, worker_id, "ending", dble(m-1), 1) + integer, external :: zmq_put_int + i= zmq_put_int(zmq_to_qp_run_socket, worker_id, "ending", (m-1)) found = .true. end if else do call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) if(time0 == -1d0) then - print *, "first pull", omp_get_wtime()-time time0 = omp_get_wtime() end if if(m_task == 0) then @@ -516,14 +554,13 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end if end do do i=1,n_tasks - if(edI(edI_index(i)) /= 0d0) stop "NIN M" edI(edI_index(i)) += edI_task(i) end do dot_f(m_task) -= f end if end do if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) + call sleep(10) if (zmq_abort(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Error in sending abort signal (2)' endif diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 0d3201bc..b9d73cb9 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -33,15 +33,14 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: cp_max(Nproc) integer :: will_send, task_id, purge_task_id, ntask_buf integer, allocatable :: task_buf(:) - integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) - integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending, getting_task +! integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) +! integer(kind=OMP_LOCK_KIND) :: lck_sto(dress_N_cp) double precision :: fac - double precision :: ending(1) - integer, external :: zmq_get_dvector + integer :: ending + integer, external :: zmq_get_dvector, zmq_get_int ! double precision, external :: omp_get_wtime double precision :: time, time0 integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) -! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) allocate(cp(N_states, N_det, dress_N_cp, 2)) @@ -53,14 +52,12 @@ subroutine run_dress_slave(thread,iproce,energy) cp = 0d0 task = CHAR(0) - call omp_init_lock(sending) - call omp_init_lock(getting_task) - do i=0,dress_N_cp+1 - call omp_init_lock(lck_sto(i)) - end do - do i=0,pt2_N_teeth+1 - call omp_init_lock(lck_det(i)) - end do +! do i=1,dress_N_cp +! call omp_init_lock(lck_sto(i)) +! end do +! do i=0,pt2_N_teeth+1 +! call omp_init_lock(lck_det(i)) +! end do cp_done = 0 cp_sent = 0 @@ -69,7 +66,7 @@ subroutine run_dress_slave(thread,iproce,energy) double precision :: hij, sij, tmp purge_task_id = 0 provide psi_energy - ending(1) = dble(dress_N_cp+1) + ending = dress_N_cp+1 ntask_tbd = 0 !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(breve_delta_m, task_id) & @@ -86,7 +83,7 @@ subroutine run_dress_slave(thread,iproce,energy) stop "WORKER -1" end if iproc = omp_get_thread_num()+1 - allocate(breve_delta_m(N_states,N_det,2)) + allocate(breve_delta_m(N_states,N_det,2)) allocate(task_buf(pt2_n_tasks_max)) ntask_buf = 0 @@ -94,8 +91,9 @@ subroutine run_dress_slave(thread,iproce,energy) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) end if + cp_max(:) = 0 do while(cp_done > cp_sent .or. m /= dress_N_cp+1) - call omp_set_lock(getting_task) + !$OMP CRITICAL (send) if(ntask_tbd == 0) then ntask_tbd = size(task_tbd) call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd) @@ -113,13 +111,13 @@ subroutine run_dress_slave(thread,iproce,energy) ntask_tbd -= 1 else m = dress_N_cp + 1 - i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + i= zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending) end if - call omp_unset_lock(getting_task) will_send = 0 - !$OMP CRITICAL cp_max(iproc) = m +! print *, cp_max(:) +! print *, '' cp_done = minval(cp_max)-1 if(cp_done > cp_sent) then will_send = cp_sent + 1 @@ -132,10 +130,8 @@ subroutine run_dress_slave(thread,iproce,energy) ntask_buf += 1 task_buf(ntask_buf) = task_id end if - !$OMP END CRITICAL - if(will_send /= 0 .and. will_send <= int(ending(1))) then - call omp_set_lock(sending) + if(will_send /= 0 .and. will_send <= ending) then n_tasks = 0 sum_f = 0 do i=1,N_det_generators @@ -146,9 +142,10 @@ subroutine run_dress_slave(thread,iproce,energy) edI_index(n_tasks) = i end if end do - call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) - call omp_unset_lock(sending) + call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, & + breve_delta_m, 0, n_tasks) end if + !$OMP END CRITICAL (send) if(m /= dress_N_cp+1) then !UPDATE i_generator @@ -158,29 +155,29 @@ subroutine run_dress_slave(thread,iproce,energy) time0 = omp_get_wtime() call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc) time = omp_get_wtime() - !print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 +!print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 t = dress_T(i_generator) - call omp_set_lock(lck_det(t)) + !$OMP CRITICAL(t_crit) do j=1,N_det do i=1,N_states delta_det(i,j,t, 1) = delta_det(i,j,t, 1) + breve_delta_m(i,j,1) delta_det(i,j,t, 2) = delta_det(i,j,t, 2) + breve_delta_m(i,j,2) enddo enddo - call omp_unset_lock(lck_det(t)) + !$OMP END CRITICAL(t_crit) do p=1,dress_N_cp if(dress_e(i_generator, p) /= 0d0) then fac = dress_e(i_generator, p) - call omp_set_lock(lck_sto(p)) + !$OMP CRITICAL(p_crit) do j=1,N_det do i=1,N_states cp(i,j,p,1) = cp(i,j,p,1) + breve_delta_m(i,j,1) * fac cp(i,j,p,2) = cp(i,j,p,2) + breve_delta_m(i,j,2) * fac enddo enddo - call omp_unset_lock(lck_sto(p)) + !$OMP END CRITICAL(p_crit) end if end do @@ -198,7 +195,9 @@ subroutine run_dress_slave(thread,iproce,energy) ntask_buf = 0 end if end if + !$OMP FLUSH end do + !$OMP BARRIER if(ntask_buf /= 0) then call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) @@ -206,12 +205,12 @@ subroutine run_dress_slave(thread,iproce,energy) end if !$OMP SINGLE if(purge_task_id /= 0) then - do while(int(ending(1)) == dress_N_cp+1) + do while(ending == dress_N_cp+1) call sleep(1) - i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + i= zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending) end do - will_send = int(ending(1)) + will_send = ending breve_delta_m = 0d0 do l=will_send, 1,-1 @@ -238,12 +237,12 @@ subroutine run_dress_slave(thread,iproce,energy) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL - do i=0,dress_N_cp+1 - call omp_destroy_lock(lck_sto(i)) - end do - do i=0,pt2_N_teeth+1 - call omp_destroy_lock(lck_det(i)) - end do +! do i=0,dress_N_cp+1 +! call omp_destroy_lock(lck_sto(i)) +! end do +! do i=0,pt2_N_teeth+1 +! call omp_destroy_lock(lck_det(i)) +! end do end subroutine diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f index 901940ed..83d95847 100644 --- a/plugins/shiftedbk/shifted_bk_slave.irp.f +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -1,15 +1,176 @@ -program shifted_bk +program shifted_bk_slave implicit none BEGIN_DOC -! Helper subroutine to compute the dress in distributed mode. +! Helper program to compute the dress in distributed mode. END_DOC - - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order - PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order - !call diagonalize_CI() - call dress_slave() + read_wf = .False. + distributed_davidson = .False. + SOFT_TOUCH read_wf distributed_davidson + call provide_all + call switch_qp_run_to_master + call run_w end +subroutine provide_all + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag + PROVIDE dress_e0_denominator mo_tot_num N_int ci_energy mpi_master zmq_state zmq_context + PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight + PROVIDE N_det_selectors dress_stoch_istate N_det +end + +subroutine run_w + use f77_zmq + + implicit none + IRP_IF MPI + include 'mpif.h' + IRP_ENDIF + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states) + character*(64) :: states(3) + character*(64) :: old_state + integer :: rc, i, ierr + double precision :: t0, t1 + + integer, external :: zmq_get_dvector, zmq_get_N_det_generators + integer, external :: zmq_get_ivector + integer, external :: zmq_get_psi, zmq_get_N_det_selectors, zmq_get_int + integer, external :: zmq_get_N_states_diag + + zmq_context = f77_zmq_ctx_new () + states(1) = 'selection' + states(2) = 'davidson' + states(3) = 'dress' + old_state = 'Waiting' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master + PROVIDE zmq_state N_det_selectors dress_stoch_istate N_det dress_e0_denominator + PROVIDE N_det_generators N_states N_states_diag + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + do + + if (mpi_master) then + call wait_for_states(states,zmq_state,size(states)) + if (zmq_state(1:64) == old_state(1:64)) then + call sleep(1) + cycle + else + old_state(1:64) = zmq_state(1:64) + endif + print *, trim(zmq_state) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in broadcast of zmq_state' + endif + IRP_ENDIF + + if(zmq_state(1:7) == 'Stopped') then + exit + endif + + + if (zmq_state(1:8) == 'davidson') then + + ! Davidson + ! -------- + + call wall_time(t0) + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle + + call wall_time(t1) + if (mpi_master) then + call write_double(6,(t1-t0),'Broadcast time') + endif + + call omp_set_nested(.True.) + call davidson_slave_tcp(0) + call omp_set_nested(.False.) + print *, 'Davidson done' + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + print *, 'All Davidson done' + + else if (zmq_state(1:5) == 'dress') then + + ! Dress + ! --- + + call wall_time(t0) + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + print *, 'if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle', mpi_rank + + if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle + print *, 'if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle', mpi_rank + + if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle + print *, 'if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle', mpi_rank + + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle + print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,threshold_generators,threshold_generators,1) == -1) cycle', mpi_rank + + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle + print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,threshold_selectors,threshold_selectors,1) == -1) cycle', mpi_rank + + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle + print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,energy,energy,N_states) == -1) cycle', mpi_rank + + if (zmq_get_int(zmq_to_qp_run_socket,1,'dress_stoch_istate',dress_stoch_istate) == -1) cycle + print *, 'if (zmq_get_int(zmq_to_qp_run_socket,1,dress_stoch_istate,dress_stoch_istate) == -1) cycle', mpi_rank + + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle + print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,state_average_weight,state_average_weight,N_states) == -1) cycle', mpi_rank + + psi_energy(1:N_states) = energy(1:N_states) + TOUCH psi_energy state_average_weight dress_stoch_istate threshold_selectors threshold_generators + if (mpi_master) then + print *, 'N_det', N_det + print *, 'N_det_generators', N_det_generators + print *, 'N_det_selectors', N_det_selectors + print *, 'psi_energy', psi_energy + print *, 'dress_stoch_istate', dress_stoch_istate + print *, 'state_average_weight', state_average_weight + endif + + call wall_time(t1) + call write_double(6,(t1-t0),'Broadcast time') + + call dress_slave_tcp(0, energy) + + + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF + print *, 'All dress done' + + endif + + end do + IRP_IF MPI + call MPI_finalize(ierr) + IRP_ENDIF +end + + + + diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f index 207cb0ae..e86a6daf 100644 --- a/src/ZMQ/put_get.irp.f +++ b/src/ZMQ/put_get.irp.f @@ -8,7 +8,7 @@ integer function zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_ integer, intent(in) :: worker_id character*(*) :: name integer, intent(in) :: size_x - double precision, intent(out) :: x(size_x) + double precision, intent(in) :: x(size_x) integer :: rc character*(256) :: msg @@ -111,7 +111,7 @@ integer function zmq_put_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_ integer, intent(in) :: worker_id character*(*) :: name integer, intent(in) :: size_x - integer, intent(out) :: x(size_x) + integer, intent(in) :: x(size_x) integer :: rc character*(256) :: msg @@ -201,3 +201,81 @@ end +integer function zmq_put_int(zmq_to_qp_run_socket, worker_id, name, x) + use f77_zmq + implicit none + BEGIN_DOC +! Put a vector of integers on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*) :: name + integer, intent(in) :: x + integer :: rc + character*(256) :: msg + + zmq_put_int = 0 + + write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + zmq_put_int = -1 + return + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,x,4,0) + if (rc /= 4) then + zmq_put_int = -1 + return + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + zmq_put_int = -1 + return + endif + +end + +integer function zmq_get_int(zmq_to_qp_run_socket, worker_id, name, x) + use f77_zmq + implicit none + BEGIN_DOC +! Get a vector of integers from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*), intent(in) :: name + integer, intent(out) :: x + integer :: rc + character*(256) :: msg + + PROVIDE zmq_state + ! Success + zmq_get_int = 0 + + if (mpi_master) then + write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + zmq_get_int = -1 + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + zmq_get_int = -1 + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,x,4,0) + if (rc /= 4) then + zmq_get_int = -1 + go to 10 + endif + endif + + 10 continue + +end + From 952a44c13bef9ee21dff1994beb661be10ad097b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Sep 2018 16:59:56 +0200 Subject: [PATCH 06/28] Cleaning --- plugins/shiftedbk/shifted_bk_slave.irp.f | 8 -------- 1 file changed, 8 deletions(-) diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f index 48ca9960..5e559402 100644 --- a/plugins/shiftedbk/shifted_bk_slave.irp.f +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -117,28 +117,20 @@ subroutine run_w call wall_time(t0) if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle - print *, 'if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle', mpi_rank if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle - print *, 'if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle', mpi_rank if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle - print *, 'if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle', mpi_rank if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle - print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,threshold_generators,threshold_generators,1) == -1) cycle', mpi_rank if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle - print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,threshold_selectors,threshold_selectors,1) == -1) cycle', mpi_rank if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle - print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,energy,energy,N_states) == -1) cycle', mpi_rank if (zmq_get_int(zmq_to_qp_run_socket,1,'dress_stoch_istate',dress_stoch_istate) == -1) cycle - print *, 'if (zmq_get_int(zmq_to_qp_run_socket,1,dress_stoch_istate,dress_stoch_istate) == -1) cycle', mpi_rank if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle - print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,state_average_weight,state_average_weight,N_states) == -1) cycle', mpi_rank psi_energy(1:N_states) = energy(1:N_states) TOUCH psi_energy state_average_weight dress_stoch_istate threshold_selectors threshold_generators From b292a4e4e5bb14750230c8a7c1a32ba18a3673e0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Sep 2018 18:08:19 +0200 Subject: [PATCH 07/28] Truncate eigenfnuction of S^2 --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 1 + plugins/Full_CI_ZMQ/selection.irp.f | 2 +- plugins/Perturbation/EZFIO.cfg | 2 +- plugins/shiftedbk/EZFIO.cfg | 2 +- src/Determinants/occ_pattern.irp.f | 46 ++++++++++++++++ src/Determinants/truncate_wf.irp.f | 56 +++++++++++++++++++- 6 files changed, 104 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 05ab7b07..7c48c57b 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -414,6 +414,7 @@ end function pt2_J(i) = i end do + integer :: m integer, allocatable :: seed(:) call random_seed(size=m) allocate(seed(m)) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 35898a46..3febff69 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -412,10 +412,10 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer :: nb_count, maskInd_save, monoBdo_save logical :: found - do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first + found = .False. monoBdo_save = monoBdo maskInd_save = maskInd do s2=s1,2 diff --git a/plugins/Perturbation/EZFIO.cfg b/plugins/Perturbation/EZFIO.cfg index 485e15cd..89125b37 100644 --- a/plugins/Perturbation/EZFIO.cfg +++ b/plugins/Perturbation/EZFIO.cfg @@ -15,7 +15,7 @@ default: 0.0001 type: Normalized_float doc: Stop stochastic PT2 when the relative error is smaller than PT2_relative_error interface: ezfio,provider,ocaml -default: 0.001 +default: 0.01 [correlation_energy_ratio_max] type: Normalized_float diff --git a/plugins/shiftedbk/EZFIO.cfg b/plugins/shiftedbk/EZFIO.cfg index 6069c855..77e97cf7 100644 --- a/plugins/shiftedbk/EZFIO.cfg +++ b/plugins/shiftedbk/EZFIO.cfg @@ -30,5 +30,5 @@ default: EN type: Normalized_float doc: Stop stochastic dressing when the relative error is smaller than PT2_relative_error interface: ezfio,provider,ocaml -default: 0.001 +default: 0.01 diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index 8250823a..016b546b 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -248,6 +248,52 @@ end END_PROVIDER +BEGIN_PROVIDER [ integer, det_to_occ_pattern, (N_det) ] + implicit none + BEGIN_DOC + ! Returns the index of the occupation pattern for each determinant + END_DOC + integer :: i,j,k + integer(bit_kind) :: occ(N_int,2) + logical :: found + do i=1,N_det + do k = 1, N_int + occ(k,1) = ieor(psi_det(k,1,i),psi_det(k,2,i)) + occ(k,2) = iand(psi_det(k,1,i),psi_det(k,2,i)) + enddo + do j=1,N_occ_pattern + found = .True. + do k=1,N_int + if ( (occ(k,1) /= psi_occ_pattern(k,1,j)) & + .or. (occ(k,2) /= psi_occ_pattern(k,2,j)) ) then + found = .False. + exit + endif + enddo + if (found) then + det_to_occ_pattern(i) = j + exit + endif + enddo + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, weight_occ_pattern, (N_occ_pattern,N_states) ] + implicit none + BEGIN_DOC + ! Weight of the occupation patterns in the wave function + END_DOC + integer :: i,j,k + weight_occ_pattern = 0.d0 + do i=1,N_det + j = det_to_occ_pattern(i) + do k=1,N_states + weight_occ_pattern(j,k) += psi_coef(i,k) * psi_coef(i,k) + enddo + enddo +END_PROVIDER + + subroutine make_s2_eigenfunction implicit none integer :: i,j,k diff --git a/src/Determinants/truncate_wf.irp.f b/src/Determinants/truncate_wf.irp.f index 320e07c2..619489aa 100644 --- a/src/Determinants/truncate_wf.irp.f +++ b/src/Determinants/truncate_wf.irp.f @@ -1,12 +1,17 @@ program s2_eig_restart implicit none read_wf = .True. - call routine + if (s2_eig) then + call routine_s2 + else + call routine + endif end + subroutine routine implicit none integer :: ndet_max - print*, 'How many determinants would you like ?' + print*, 'Max number of determinants ?' read(5,*)ndet_max integer(bit_kind), allocatable :: psi_det_tmp(:,:,:) double precision, allocatable :: psi_coef_tmp(:,:) @@ -37,3 +42,50 @@ subroutine routine call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,N_det_max,psi_coef_tmp) end + +subroutine routine_s2 + implicit none + integer :: ndet_max + double precision :: wmin + integer(bit_kind), allocatable :: psi_det_tmp(:,:,:) + double precision, allocatable :: psi_coef_tmp(:,:) + integer :: i,j,k + double precision :: accu(N_states) + + print*, 'Min weight of the occupation pattern ?' + read(5,*) wmin + + ndet_max = 0 + do i=1,N_det + if (maxval(weight_occ_pattern( det_to_occ_pattern(i),:)) < wmin) cycle + ndet_max = ndet_max+1 + enddo + + allocate(psi_det_tmp(N_int,2,ndet_max),psi_coef_tmp(ndet_max, N_states)) + + accu = 0.d0 + k=0 + do i = 1, N_det + if (maxval(weight_occ_pattern( det_to_occ_pattern(i),:)) < wmin) cycle + k = k+1 + do j = 1, N_int + psi_det_tmp(j,1,k) = psi_det(j,1,i) + psi_det_tmp(j,2,k) = psi_det(j,2,i) + enddo + do j = 1, N_states + psi_coef_tmp(k,j) = psi_coef(i,j) + accu(j) += psi_coef_tmp(k,j) **2 + enddo + enddo + do j = 1, N_states + accu(j) = 1.d0/dsqrt(accu(j)) + enddo + do j = 1, N_states + do i = 1, ndet_max + psi_coef_tmp(i,j) = psi_coef_tmp(i,j) * accu(j) + enddo + enddo + + call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,N_det_max,psi_coef_tmp) + +end From 5c769d531c9a6c1335b3df4144f4c1a2a633e44a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Sep 2018 19:07:55 +0200 Subject: [PATCH 08/28] Fixed bug in truncate_wf --- src/Determinants/determinants.irp.f | 19 +++++++------------ src/Determinants/occ_pattern.irp.f | 3 +++ src/Determinants/truncate_wf.irp.f | 4 ++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index f04a85a5..1e58e262 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -549,23 +549,18 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) deallocate (psi_det_save) allocate (psi_coef_save(ndet,nstates)) - double precision :: accu_norm(nstates) - accu_norm = 0.d0 + double precision :: accu_norm do k=1,nstates + accu_norm = 0.d0 do i=1,ndet - accu_norm(k) = accu_norm(k) + psicoef(i,k) * psicoef(i,k) - psi_coef_save(i,k) = psicoef(i,k) + accu_norm = accu_norm + psicoef(i,k) * psicoef(i,k) enddo - if (accu_norm(k) == 0.d0) then - accu_norm(k) = 1.e-12 + if (accu_norm == 0.d0) then + accu_norm = 1.e-12 endif - enddo - do k = 1, nstates - accu_norm(k) = 1.d0/dsqrt(accu_norm(k)) - enddo - do k=1,nstates + accu_norm = 1.d0/dsqrt(accu_norm) do i=1,ndet - psi_coef_save(i,k) = psi_coef_save(i,k) * accu_norm(k) + psi_coef_save(i,k) = psicoef(i,k) * accu_norm enddo enddo diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index 016b546b..f8358a17 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -256,6 +256,8 @@ BEGIN_PROVIDER [ integer, det_to_occ_pattern, (N_det) ] integer :: i,j,k integer(bit_kind) :: occ(N_int,2) logical :: found + !$OMP PARALLEL DO DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,j,found,occ) do i=1,N_det do k = 1, N_int occ(k,1) = ieor(psi_det(k,1,i),psi_det(k,2,i)) @@ -276,6 +278,7 @@ BEGIN_PROVIDER [ integer, det_to_occ_pattern, (N_det) ] endif enddo enddo + !$OMP END PARALLEL DO END_PROVIDER BEGIN_PROVIDER [ double precision, weight_occ_pattern, (N_occ_pattern,N_states) ] diff --git a/src/Determinants/truncate_wf.irp.f b/src/Determinants/truncate_wf.irp.f index 619489aa..6d5e2c98 100644 --- a/src/Determinants/truncate_wf.irp.f +++ b/src/Determinants/truncate_wf.irp.f @@ -39,7 +39,7 @@ subroutine routine enddo enddo - call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,N_det_max,psi_coef_tmp) + call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,size(psi_coef_tmp,1),psi_coef_tmp) end @@ -86,6 +86,6 @@ subroutine routine_s2 enddo enddo - call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,N_det_max,psi_coef_tmp) + call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,size(psi_coef_tmp,1),psi_coef_tmp) end From 0e0e8fb7b664ad0ca5e59cb0a6556d865056a449 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 14 Sep 2018 19:44:55 +0200 Subject: [PATCH 09/28] Fixed variable character string --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 3 +-- plugins/dress_zmq/dress_stoch_routines.irp.f | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 7c48c57b..46fa2da7 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -160,8 +160,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error) integer, external :: add_task_to_taskserver - character(len=:), allocatable :: task - allocate(character(len=100000) :: task) + character(100000) :: task integer :: j,k,ipos diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 965f7add..38434224 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -232,7 +232,6 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) implicit none - character(len=:), allocatable :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -246,7 +245,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer :: i, j, k, Ncp double precision :: state_average_weight_save(N_states) - allocate(character(len=100000) :: task) + character(100000) :: task PROVIDE Nproc task(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) From fa7144ece647131bc9377a8dfa72ce97912fb51e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Sep 2018 14:43:39 +0200 Subject: [PATCH 10/28] Refinement of psi_generators --- plugins/Full_CI_ZMQ/selection.irp.f | 14 +++++++++----- plugins/Generators_full/generators.irp.f | 7 ++++--- plugins/Selectors_full/selectors.irp.f | 21 ++++++++++----------- src/DavidsonUndressed/print_energy.irp.f | 5 +++++ 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 3febff69..631c96ab 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -308,7 +308,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - integer :: l_a, nmax + integer :: l_a, nmax, idx integer, allocatable :: indices(:), exc_degree(:), iorder(:) allocate (indices(N_det), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) @@ -331,7 +331,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1 i = psi_bilinear_matrix_rows(l_a) if (nt + exc_degree(i) <= 4) then - indices(k) = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) + idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) + if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle + indices(k) = idx k=k+1 endif enddo @@ -350,9 +352,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d i = psi_bilinear_matrix_transp_columns(l_a) if (exc_degree(i) < 3) cycle if (nt + exc_degree(i) <= 4) then - indices(k) = psi_det_sorted_order( & - psi_bilinear_matrix_order( & - psi_bilinear_matrix_transp_order(l_a))) + idx = psi_det_sorted_order( & + psi_bilinear_matrix_order( & + psi_bilinear_matrix_transp_order(l_a))) + if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle + indices(k) = idx k=k+1 endif enddo diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index 2ce6f854..b996d31b 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -9,11 +9,12 @@ BEGIN_PROVIDER [ integer, N_det_generators ] integer :: i double precision :: norm call write_time(6) - norm = 0.d0 + norm = 1.d0 N_det_generators = N_det do i=1,N_det - norm = norm + psi_average_norm_contrib_sorted(i) - if (norm > threshold_generators+1d-10) then + norm = norm - psi_average_norm_contrib_sorted(i) + if (psi_average_norm_contrib_sorted(i) == 0.d0) exit + if (norm < 1.d0 - threshold_generators) then N_det_generators = i exit endif diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f index 3d58bdcc..f0d50c94 100644 --- a/plugins/Selectors_full/selectors.irp.f +++ b/plugins/Selectors_full/selectors.irp.f @@ -10,17 +10,16 @@ BEGIN_PROVIDER [ integer, N_det_selectors] double precision :: norm, norm_max call write_time(6) N_det_selectors = N_det - if (threshold_generators < 1.d0) then - norm = 0.d0 - do i=1,N_det - norm = norm + psi_average_norm_contrib_sorted(i) - if (norm > threshold_selectors) then - N_det_selectors = i - exit - endif - enddo - N_det_selectors = max(N_det_selectors,N_det_generators) - endif + norm = 1.d0 + do i=1,N_det + norm = norm - psi_average_norm_contrib_sorted(i) + if (psi_average_norm_contrib_sorted(i) == 0.d0) exit + if (norm < 1.d0 - threshold_selectors) then + N_det_selectors = i + exit + endif + enddo + N_det_selectors = max(N_det_selectors,N_det_generators) call write_int(6,N_det_selectors,'Number of selectors') END_PROVIDER diff --git a/src/DavidsonUndressed/print_energy.irp.f b/src/DavidsonUndressed/print_energy.irp.f index ae6f1da2..d694cb6c 100644 --- a/src/DavidsonUndressed/print_energy.irp.f +++ b/src/DavidsonUndressed/print_energy.irp.f @@ -2,7 +2,12 @@ program print_energy implicit none read_wf = .true. touch read_wf + provide mo_bielec_integrals_in_map + double precision :: time1, time0 + call wall_time(time0) call routine + call wall_time(time1) + print *, 'Wall time :' , time1 - time0 end subroutine routine From df7d5cd117b76ea7718b087d089bff565151e608 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Sep 2018 15:15:25 +0200 Subject: [PATCH 11/28] Fixed multi-state efficiency --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 4 ++-- plugins/Generators_full/generators.irp.f | 3 +-- plugins/Selectors_full/selectors.irp.f | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 46fa2da7..aba1e587 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -49,7 +49,7 @@ logical function testTeethBuilding(minF, N) allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) do i=1,N_det_generators - tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 + 1.d-20 + tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20 enddo double precision :: norm @@ -479,7 +479,7 @@ END_PROVIDER tilde_cW(0) = 0d0 do i=1,N_det_generators - tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 + 1.d-20 + tilde_w(i) = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20 enddo double precision :: norm diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index b996d31b..dc1c7d77 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -13,8 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det do i=1,N_det norm = norm - psi_average_norm_contrib_sorted(i) - if (psi_average_norm_contrib_sorted(i) == 0.d0) exit - if (norm < 1.d0 - threshold_generators) then + if (norm - 1.d-12 < 1.d0 - threshold_generators) then N_det_generators = i exit endif diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f index f0d50c94..6e198353 100644 --- a/plugins/Selectors_full/selectors.irp.f +++ b/plugins/Selectors_full/selectors.irp.f @@ -13,8 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_selectors] norm = 1.d0 do i=1,N_det norm = norm - psi_average_norm_contrib_sorted(i) - if (psi_average_norm_contrib_sorted(i) == 0.d0) exit - if (norm < 1.d0 - threshold_selectors) then + if (norm - 1.d-12 < 1.d0 - threshold_selectors) then N_det_selectors = i exit endif From f3e22a81f79fb7389ce2455c7866e06705499da4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Sep 2018 00:03:39 +0200 Subject: [PATCH 12/28] Improved parallel scaling --- plugins/Full_CI_ZMQ/selection.irp.f | 6 +++-- .../selection_davidson_slave.irp.f | 27 ++++++++++++++----- plugins/Generators_full/generators.irp.f | 2 +- plugins/Selectors_full/selectors.irp.f | 2 +- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 631c96ab..43ada9a0 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -332,7 +332,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d i = psi_bilinear_matrix_rows(l_a) if (nt + exc_degree(i) <= 4) then idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) - if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle +! if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle + if (idx > N_det_selectors) cycle indices(k) = idx k=k+1 endif @@ -355,7 +356,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d idx = psi_det_sorted_order( & psi_bilinear_matrix_order( & psi_bilinear_matrix_transp_order(l_a))) - if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle +! if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle + if (idx > N_det_selectors) cycle indices(k) = idx k=k+1 endif diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index fb214ad9..e778be0c 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -50,7 +50,8 @@ subroutine run_wf PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator - PROVIDE N_det_generators N_states N_states_diag + PROVIDE N_det_generators N_states N_states_diag psi_energy + IRP_IF MPI call MPI_BARRIER(MPI_COMM_WORLD, ierr) IRP_ENDIF @@ -155,6 +156,12 @@ subroutine run_wf ! PT2 ! --- + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF call wall_time(t0) if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle @@ -177,13 +184,19 @@ subroutine run_wf call wall_time(t1) call write_double(6,(t1-t0),'Broadcast time') + IRP_IF MPI + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here, 'error in barrier' + endif + IRP_ENDIF - logical :: lstop - lstop = .False. - !$OMP PARALLEL PRIVATE(i) - i = omp_get_thread_num() - call run_pt2_slave(0,i,pt2_e0_denominator) - !$OMP END PARALLEL + if (.true.) then + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call run_pt2_slave(0,i,pt2_e0_denominator) + !$OMP END PARALLEL + endif print *, 'PT2 done' FREE state_average_weight diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index dc1c7d77..5b55d1f1 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det do i=1,N_det norm = norm - psi_average_norm_contrib_sorted(i) - if (norm - 1.d-12 < 1.d0 - threshold_generators) then + if (norm - 1.d-10 < 1.d0 - threshold_generators) then N_det_generators = i exit endif diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f index 6e198353..ea0e8680 100644 --- a/plugins/Selectors_full/selectors.irp.f +++ b/plugins/Selectors_full/selectors.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_selectors] norm = 1.d0 do i=1,N_det norm = norm - psi_average_norm_contrib_sorted(i) - if (norm - 1.d-12 < 1.d0 - threshold_selectors) then + if (norm - 1.d-10 < 1.d0 - threshold_selectors) then N_det_selectors = i exit endif From 52b5283fe6813b955b15440370e85635439108aa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Sep 2018 09:25:32 +0200 Subject: [PATCH 13/28] Minor changes --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 3 ++- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 6 +++--- src/Determinants/density_matrix.irp.f | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index aba1e587..f3c8d835 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -16,6 +16,7 @@ END_PROVIDER integer :: e e = elec_num - n_core_orb * 2 pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10) + pt2_n_tasks_max = 1 do i=1,N_det_generators if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.001d0) then pt2_F(i) = pt2_n_tasks_max @@ -178,7 +179,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error) do j=1,pt2_F(pt2_J(i)) write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) ipos += 20 - if (ipos > len(task)-20) then + if (ipos > 100000-20) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' endif diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index 397bee82..d21d0a27 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -52,16 +52,16 @@ subroutine ZMQ_selection(N_in, pt2) endif integer, external :: add_task_to_taskserver - character(len=64000) :: task + character(len=100000) :: task integer :: j,k,ipos ipos=1 task = ' ' - do i= 1, N_det_generators + do i= 1, N_det_generators do j=1,pt2_F(pt2_J(i)) write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N ipos += 30 - if (ipos > 63970) then + if (ipos > 100000-30) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' endif diff --git a/src/Determinants/density_matrix.irp.f b/src/Determinants/density_matrix.irp.f index c7afebc6..0511b455 100644 --- a/src/Determinants/density_matrix.irp.f +++ b/src/Determinants/density_matrix.irp.f @@ -398,7 +398,7 @@ BEGIN_PROVIDER [ double precision, c0_weight, (N_states) ] do i=1,N_states c0_weight(i) = 1.d-31 c = maxval(psi_coef(:,i) * psi_coef(:,i)) - c0_weight(i) = 1.d0/c + c0_weight(i) = 1.d0/(c+1.d-20) c0_weight(i) = min(c0_weight(i), 100.d0) enddo if (mpi_master) then From 9a77f8d342dd74deeed6875a9096d82a355e78e2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Sep 2018 09:40:19 +0200 Subject: [PATCH 14/28] Fixed broken selection introduced in f3e22a8 --- plugins/Full_CI_ZMQ/selection.irp.f | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 43ada9a0..32fbbdec 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -332,8 +332,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d i = psi_bilinear_matrix_rows(l_a) if (nt + exc_degree(i) <= 4) then idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) -! if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle - if (idx > N_det_selectors) cycle + if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle indices(k) = idx k=k+1 endif @@ -356,8 +355,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d idx = psi_det_sorted_order( & psi_bilinear_matrix_order( & psi_bilinear_matrix_transp_order(l_a))) -! if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle - if (idx > N_det_selectors) cycle + if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle indices(k) = idx k=k+1 endif @@ -374,8 +372,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d call isort(indices,iorder,nmax) deallocate(iorder) - allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), & - interesting(0:N_det_selectors), fullinteresting(0:N_det)) + allocate(preinteresting(0:N_det), prefullinteresting(0:N_det), & + interesting(0:N_det), fullinteresting(0:N_det)) preinteresting(0) = 0 prefullinteresting(0) = 0 From 47511da13305c6b24137a16d11ecc3f5919adfb4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Sep 2018 09:52:40 +0200 Subject: [PATCH 15/28] Wrong type --- plugins/Full_CI_ZMQ/selection.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 32fbbdec..2f7c239f 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -414,7 +414,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d allocate (mat(N_states, mo_tot_num, mo_tot_num)) maskInd = -1 - integer :: nb_count, maskInd_save, monoBdo_save + integer :: nb_count, maskInd_save + logical :: monoBdo_save logical :: found do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first From e89d623be562ac23cd2b3905c12bc540a22b918d Mon Sep 17 00:00:00 2001 From: Pierre-Francois Loos Date: Wed, 19 Sep 2018 10:08:07 +0200 Subject: [PATCH 16/28] NOFT (#73) * Fixed travis * NOFT by T2 * Bug with n_tasks_max --- plugins/NOFT/.gitignore | 5 + plugins/NOFT/EZFIO.cfg | 19 ++ plugins/NOFT/NEEDED_CHILDREN_MODULES | 1 + plugins/NOFT/NOFT.irp.f | 72 ++++ plugins/NOFT/NOFT_JKLfunc.irp.f | 484 +++++++++++++++++++++++++++ plugins/NOFT/NOFT_JKfunc.irp.f | 260 ++++++++++++++ plugins/NOFT/NOFT_PT2.irp.f | 46 +++ plugins/NOFT/NOFT_core.irp.f | 51 +++ plugins/NOFT/README.rst | 12 + plugins/NOFT/ezfio_interface.irp.f | 106 ++++++ 10 files changed, 1056 insertions(+) create mode 100644 plugins/NOFT/.gitignore create mode 100644 plugins/NOFT/EZFIO.cfg create mode 100644 plugins/NOFT/NEEDED_CHILDREN_MODULES create mode 100644 plugins/NOFT/NOFT.irp.f create mode 100644 plugins/NOFT/NOFT_JKLfunc.irp.f create mode 100644 plugins/NOFT/NOFT_JKfunc.irp.f create mode 100644 plugins/NOFT/NOFT_PT2.irp.f create mode 100644 plugins/NOFT/NOFT_core.irp.f create mode 100644 plugins/NOFT/README.rst create mode 100644 plugins/NOFT/ezfio_interface.irp.f diff --git a/plugins/NOFT/.gitignore b/plugins/NOFT/.gitignore new file mode 100644 index 00000000..7ac9fbf6 --- /dev/null +++ b/plugins/NOFT/.gitignore @@ -0,0 +1,5 @@ +IRPF90_temp/ +IRPF90_man/ +irpf90.make +irpf90_entities +tags \ No newline at end of file diff --git a/plugins/NOFT/EZFIO.cfg b/plugins/NOFT/EZFIO.cfg new file mode 100644 index 00000000..b193a86e --- /dev/null +++ b/plugins/NOFT/EZFIO.cfg @@ -0,0 +1,19 @@ +[do_JK_functionals] +type: logical +doc: Compute energies for JK-only functionals +interface: ezfio,provider,ocaml +default: True + +[do_JKL_functionals] +type: logical +doc: Compute energies for JKL-only functionals (PNOFs) +interface: ezfio,provider,ocaml +default: True + +[do_PT2_NOFT] +type: logical +doc: Compute PT2 correction for NOFT +interface: ezfio,provider,ocaml +default: False + + diff --git a/plugins/NOFT/NEEDED_CHILDREN_MODULES b/plugins/NOFT/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..02dd611e --- /dev/null +++ b/plugins/NOFT/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Hartree_Fock Determinants diff --git a/plugins/NOFT/NOFT.irp.f b/plugins/NOFT/NOFT.irp.f new file mode 100644 index 00000000..0c74d5a7 --- /dev/null +++ b/plugins/NOFT/NOFT.irp.f @@ -0,0 +1,72 @@ +program NOFT + implicit none + BEGIN_DOC +! Natural orbital functional theory module + END_DOC + + PROVIDE mo_bielec_integrals_in_map + + integer :: i,j + integer :: nMO,FL + double precision :: ET,EV + double precision :: integral,get_mo_bielec_integral + double precision,allocatable :: n(:) + + print*, '' + print*, '*******************************' + print*, '*** NOFT functionals ***' + print*, '*******************************' + print*, '' + print*, 'SD = single determinant' + print*, 'MBB = Muller, Buijse and Baerends' + print*, 'POWER = Cioslowski and Pernal' + print*, 'BCC2 = Gritsenko and coworkers' + print*, 'CA = Csanyi and Arias' + print*, 'CGA = Csanyi, Goedecker and Arias' + print*, 'GU = Goedecker and Umrigar' + print*, 'ML = Marques and Lathiotakis' + print*, 'MLSIC = ML with self-interaction correction' + print*, 'PNOF2 = Piris natural orbital functional 2 (bug)' + print*, 'PNOF3 = Piris natural orbital functional 3' + print*, 'PNOF4 = Piris natural orbital functional 4' + print*, 'PNOF5 = Piris natural orbital functional 5 (NYI)' + print*, 'PNOF6x = Piris natural orbital functional 6 (x = d, u, h)' + print*, 'PNOF7 = Piris natural orbital functional 7 (NYI)' + print*, '' + print*, '*******************************' + print*, '' + print*, '*******************************' + print*, '*** NOFT energies ***' + print*, '*******************************' + print*, '' + +! Occupation numbers + + nMO = mo_tot_num + FL = elec_num/2 + allocate(n(nMO)) + n(1:nMO) = 0.5d0*mo_occ(1:mo_tot_num) + +! Compute core energies + + call NOFT_core(nMO,ET,EV,n) + +! JK-only functionals + + if(do_JK_functionals) call NOFT_JKfunc(nMO,FL,ET,EV,n) + +! JKL-only functionals + + if(do_JKL_functionals) call NOFT_JKLfunc(nMO,FL,ET,EV,n) + +! PT2-NOFT correction + + if(do_PT2_NOFT) call NOFT_JKLfunc(nMO,FL,n) + +! End + + print*, '*******************************' + print*, '' + +end + diff --git a/plugins/NOFT/NOFT_JKLfunc.irp.f b/plugins/NOFT/NOFT_JKLfunc.irp.f new file mode 100644 index 00000000..5c9e9df6 --- /dev/null +++ b/plugins/NOFT/NOFT_JKLfunc.irp.f @@ -0,0 +1,484 @@ +subroutine NOFT_JKLfunc(nMO,FL,ET,EV,n) +! JKL-only functionals for NOFT + END_DOC + + PROVIDE mo_bielec_integrals_in_map + +! Input variables + + integer,intent(in) :: nMO,FL + double precision,intent(in) :: ET,EV + double precision,intent(in) :: n(nMO) + +! Local variables + + integer :: i,j + double precision :: EJ_SD,EK_SD + double precision :: EJ_PNOF2,EJ_PNOF3,EJ_PNOF4,EJ_PNOF5,EJ_PNOF6d,EJ_PNOF6u,EJ_PNOF6h,EJ_PNOF7 + double precision :: EK_PNOF2,EK_PNOF3,EK_PNOF4,EK_PNOF5,EK_PNOF6d,EK_PNOF6u,EK_PNOF6h,EK_PNOF7 + double precision :: EL_PNOF2,EL_PNOF3,EL_PNOF4,EL_PNOF5,EL_PNOF6d,EL_PNOF6u,EL_PNOF6h,EL_PNOF7 + double precision :: E_PNOF2,E_PNOF3,E_PNOF4,E_PNOF5,E_PNOF6d,E_PNOF6u,E_PNOF6h,E_PNOF7 + double precision :: get_mo_bielec_integral + + double precision :: SF,Sd,Su,Sh,Delta_ij,T_ij,Pi_ij + + double precision,allocatable :: h(:),kappa(:),gam(:),Jint(:,:),Kint(:,:),Lint(:,:) + +! memory allocation + + allocate(h(nMO),kappa(nMO),gam(nMO),Jint(nMO,nMO),Kint(nMO,nMO),Lint(nMO,nMO)) + +! Useful quantities + + h(1:nMO) = 1d0 - n(1:nMO) + + SF = 0d0 + do i=1,FL + SF = SF + h(i) + enddo + +! Useful quantities for PNOF6 + + do i=1,FL + kappa(i) = h(i)*exp(-SF) + enddo + do i=FL+1,nMO + kappa(i) = n(i)*exp(-SF) + enddo + + gam(:) = 0d0 + do i=1,nMO + do j=i,FL + gam(i) = gam(i) + kappa(j) + enddo + gam(i) = n(i)*h(i) + kappa(i)*kappa(i) - kappa(i)*gam(i) + enddo + + Sd = 0d0 + do i=1,FL + Sd = Sd + gam(i) + enddo + + Su = 0d0 + do i=FL+1,nMO + Su = Su + gam(i) + enddo + + Sh = 0.5d0*(Sd + Su) + +! Coulomb, exchange and time-inversion integrals + + do i=1,nMO + do j=1,nMO + + Jint(i,j) = get_mo_bielec_integral(i,j,i,j,mo_integrals_map) + Kint(i,j) = get_mo_bielec_integral(i,j,j,i,mo_integrals_map) + Lint(i,j) = get_mo_bielec_integral(i,i,j,j,mo_integrals_map) + + enddo + enddo + +!**************************************** +!*** Coulomb and exchange parts of SD *** +!**************************************** + + EJ_SD = +2d0*dot_product(n,matmul(Jint,n)) + EK_SD = -1d0*dot_product(n,matmul(Kint,n)) + +! ************* +! *** PNOF2 *** +! ************* + + EJ_PNOF2 = 0d0 + EK_PNOF2 = 0d0 + EL_PNOF2 = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i == j) then + + Delta_ij = n(i)*n(j) + T_ij = 0d0 + Pi_ij = sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j <= FL) then + + Delta_ij = h(i)*h(j) + T_ij = n(i)*n(j) - Delta_ij + Pi_ij = sqrt(n(i)*n(j)) + sqrt(h(i)*h(j)) + T_ij + + elseif(i <= FL .and. j > FL) then + + Delta_ij = n(j)*h(i)*(1d0 - SF)/SF + T_ij = n(i)*n(j) - Delta_ij + Pi_ij = sqrt(n(i)*n(j)) - sqrt(n(j)*h(i)) + T_ij + + elseif(i > FL .and. j <= FL) then + + Delta_ij = n(i)*h(j)*(1d0 - SF)/SF + T_ij = n(i)*n(j) - Delta_ij + Pi_ij = sqrt(n(i)*n(j)) - sqrt(n(i)*h(j)) + T_ij + + elseif(i > FL .and. j > FL) then + + Delta_ij = n(i)*n(j) + T_ij = n(i)*n(j) - Delta_ij + Pi_ij = T_ij + + else + + Delta_ij = 0d0 + T_ij = 0d0 + Pi_ij = 0d0 + + endif + + EJ_PNOF2 = EJ_PNOF2 - 2d0*Delta_ij*Jint(i,j) + EK_PNOF2 = EK_PNOF2 + Delta_ij*Kint(i,j) + EL_PNOF2 = EL_PNOF2 + Pi_ij*Lint(i,j) + + enddo + enddo + +! ************* +! *** PNOF3 *** +! ************* + + EJ_PNOF3 = 0d0 + EK_PNOF3 = 0d0 + EL_PNOF3 = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i == j) then + + Delta_ij = n(i)*n(j) + Pi_ij = sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j <= FL) then + + Delta_ij = h(i)*h(j) + Pi_ij = n(i)*n(j) - sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j > FL) then + + Delta_ij = n(j)*h(i)*(1d0 - SF)/SF + Pi_ij = n(i)*n(j) - sqrt(n(i)*n(j)) - sqrt(n(j)*h(i)) + + elseif(i > FL .and. j <= FL) then + + Delta_ij = n(i)*h(j)*(1d0 - SF)/SF + Pi_ij = n(i)*n(j) - sqrt(n(i)*n(j)) - sqrt(n(i)*h(j)) + + elseif(i > FL .and. j > FL) then + + Delta_ij = n(i)*n(j) + Pi_ij = n(i)*n(j) + sqrt(n(i)*n(j)) + + else + + Delta_ij = 0d0 + Pi_ij = 0d0 + + endif + + EJ_PNOF3 = EJ_PNOF3 - Delta_ij*Jint(i,j) + EK_PNOF3 = EK_PNOF3 + EL_PNOF3 = EL_PNOF3 + Pi_ij*Lint(i,j) + + enddo + enddo + +! ************* +! *** PNOF4 *** +! ************* + + EJ_PNOF4 = 0d0 + EK_PNOF4 = 0d0 + EL_PNOF4 = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i == j) then + + Delta_ij = n(i)*n(j) + Pi_ij = sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j <= FL) then + + Delta_ij = h(i)*h(j) + Pi_ij = - sqrt(h(i)*h(j)) + + elseif(i <= FL .and. j > FL) then + + Delta_ij = n(j)*h(i)*(1d0 - SF)/SF + Pi_ij = - sqrt( (h(i)*n(j)/SF) * (n(i)-n(j)+h(i)*n(j)/SF)) + + elseif(i > FL .and. j <= FL) then + + Delta_ij = n(i)*h(j)*(1d0 - SF)/SF + Pi_ij = - sqrt( (h(j)*n(i)/SF) * (n(j)-n(i)+h(j)*n(i)/SF)) + + elseif(i > FL .and. j >= FL) then + + Delta_ij = n(i)*n(j) + Pi_ij = sqrt(n(i)*n(j)) + + else + + Delta_ij = 0d0 + Pi_ij = 0d0 + + endif + + EJ_PNOF4 = EJ_PNOF4 - 2d0*Delta_ij*Jint(i,j) + EK_PNOF4 = EK_PNOF4 + Delta_ij*Kint(i,j) + EL_PNOF4 = EL_PNOF4 + Pi_ij*Lint(i,j) + + enddo + enddo + + +! ************** +! *** PNOF6d *** +! ************** + + EJ_PNOF6d = 0d0 + EK_PNOF6d = 0d0 + EL_PNOF6d = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i == j) then + + Delta_ij = n(i)*n(j) + Pi_ij = sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j <= FL) then + + Delta_ij = h(i)*h(j)*exp(-2d0*SF) + Pi_ij = - sqrt(h(i)*h(j))*exp(-SF) + + elseif(i <= FL .and. j > FL) then + + Delta_ij = gam(i)*gam(j)/Sd + Pi_ij = - sqrt( (n(i)*h(j) + gam(i)*gam(j)/Sd) * (n(j)*h(i) + gam(i)*gam(j)/Sd)) + + elseif(i > FL .and. j <= FL) then + + Delta_ij = gam(i)*gam(j)/Sd + Pi_ij = - sqrt( (n(i)*h(j) + gam(i)*gam(j)/Sd) * (n(j)*h(i) + gam(i)*gam(j)/Sd)) + + elseif(i > FL .and. j >= FL) then + + Delta_ij = n(i)*n(j)*exp(-2d0*SF) + Pi_ij = sqrt(n(i)*n(j))*exp(-SF) + + else + + Delta_ij = 0d0 + Pi_ij = 0d0 + + endif + + EJ_PNOF6d = EJ_PNOF6d - 2d0*Delta_ij*Jint(i,j) + EK_PNOF6d = EK_PNOF6d + Delta_ij*Kint(i,j) + EL_PNOF6d = EL_PNOF6d + Pi_ij*Lint(i,j) + + enddo + enddo + +! ************** +! *** PNOF6u *** +! ************** + + EJ_PNOF6u = 0d0 + EK_PNOF6u = 0d0 + EL_PNOF6u = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i == j) then + + Delta_ij = n(i)*n(j) + Pi_ij = sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j <= FL) then + + Delta_ij = h(i)*h(j)*exp(-2d0*SF) + Pi_ij = - sqrt(h(i)*h(j))*exp(-SF) + + elseif(i <= FL .and. j > FL) then + + Delta_ij = gam(i)*gam(j)/Su + Pi_ij = - sqrt( (n(i)*h(j) + gam(i)*gam(j)/Su) * (n(j)*h(i) + gam(i)*gam(j)/Su)) + + elseif(i > FL .and. j <= FL) then + + Delta_ij = gam(i)*gam(j)/Su + Pi_ij = - sqrt( (n(i)*h(j) + gam(i)*gam(j)/Su) * (n(j)*h(i) + gam(i)*gam(j)/Su)) + + elseif(i > FL .and. j >= FL) then + + Delta_ij = n(i)*n(j)*exp(-2d0*SF) + Pi_ij = sqrt(n(i)*n(j))*exp(-SF) + + else + + Delta_ij = 0d0 + Pi_ij = 0d0 + + endif + + EJ_PNOF6u = EJ_PNOF6u - 2d0*Delta_ij*Jint(i,j) + EK_PNOF6u = EK_PNOF6u + Delta_ij*Kint(i,j) + EL_PNOF6u = EL_PNOF6u + Pi_ij*Lint(i,j) + + enddo + enddo + +! ************** +! *** PNOF6h *** +! ************** + + EJ_PNOF6h = 0d0 + EK_PNOF6h = 0d0 + EL_PNOF6h = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i == j) then + + Delta_ij = n(i)*n(j) + Pi_ij = sqrt(n(i)*n(j)) + + elseif(i <= FL .and. j <= FL) then + + Delta_ij = h(i)*h(j)*exp(-2d0*SF) + Pi_ij = - sqrt(h(i)*h(j))*exp(-SF) + + elseif(i <= FL .and. j > FL) then + + Delta_ij = gam(i)*gam(j)/Sh + Pi_ij = - sqrt( (n(i)*h(j) + gam(i)*gam(j)/Sh) * (n(j)*h(i) + gam(i)*gam(j)/Sh)) + + elseif(i > FL .and. j <= FL) then + + Delta_ij = gam(i)*gam(j)/Sh + Pi_ij = - sqrt( (n(i)*h(j) + gam(i)*gam(j)/Sh) * (n(j)*h(i) + gam(i)*gam(j)/Sh)) + + elseif(i > FL .and. j >= FL) then + + Delta_ij = n(i)*n(j)*exp(-2d0*SF) + Pi_ij = sqrt(n(i)*n(j))*exp(-SF) + + else + + Delta_ij = 0d0 + Pi_ij = 0d0 + + endif + + EJ_PNOF6h = EJ_PNOF6h - 2d0*Delta_ij*Jint(i,j) + EK_PNOF6h = EK_PNOF6h + Delta_ij*Kint(i,j) + EL_PNOF6h = EL_PNOF6h + Pi_ij*Lint(i,j) + + enddo + enddo + +! Add the SD part + + EJ_PNOF2 = EJ_SD + EJ_PNOF2 + EJ_PNOF3 = EJ_SD + EJ_PNOF3 + EJ_PNOF4 = EJ_SD + EJ_PNOF4 +! EJ_PNOF5 = EJ_SD + EJ_PNOF5 + EJ_PNOF6d = EJ_SD + EJ_PNOF6d + EJ_PNOF6u = EJ_SD + EJ_PNOF6u + EJ_PNOF6h = EJ_SD + EJ_PNOF6h +! EJ_PNOF7 = EJ_SD + EJ_PNOF7 + + EK_PNOF2 = EK_SD + EK_PNOF2 + EK_PNOF3 = EK_SD + EK_PNOF3 + EK_PNOF4 = EK_SD + EK_PNOF4 +! EK_PNOF5 = EK_SD + EK_PNOF5 + EK_PNOF6d = EK_SD + EK_PNOF6d + EK_PNOF6u = EK_SD + EK_PNOF6u + EK_PNOF6h = EK_SD + EK_PNOF6h +! EK_PNOF7 = EK_SD + EK_PNOF7 + +! Compute total energies + + E_PNOF2 = ET + EV + EJ_PNOF2 + EK_PNOF2 + EL_PNOF2 + E_PNOF3 = ET + EV + EJ_PNOF3 + EK_PNOF3 + EL_PNOF3 + E_PNOF4 = ET + EV + EJ_PNOF4 + EK_PNOF4 + EL_PNOF4 +! E_PNOF5 = ET + EV + EJ_PNOF5 + EK_PNOF5 + EL_PNOF5 + E_PNOF6d = ET + EV + EJ_PNOF6d + EK_PNOF6d + EL_PNOF6d + E_PNOF6u = ET + EV + EJ_PNOF6u + EK_PNOF6u + EL_PNOF6u + E_PNOF6h = ET + EV + EJ_PNOF6h + EK_PNOF6h + EL_PNOF6h +! E_PNOF7 = ET + EV + EJ_PNOF7 + EK_PNOF7 + EL_PNOF7 + +! Dump energies + + print*, '*******************************' + print*, '*** JKL NOFT functionals ***' + print*, '*******************************' + print*, '' + print*, '*** Coulomb energies ***' + print*, 'Coulomb PNOF2 energy = ',EJ_PNOF2 + print*, 'Coulomb PNOF3 energy = ',EJ_PNOF3 + print*, 'Coulomb PNOF4 energy = ',EJ_PNOF4 +! print*, 'Coulomb PNOF5 energy = ',EJ_PNOF5 + print*, 'Coulomb PNOF6d energy = ',EJ_PNOF6d + print*, 'Coulomb PNOF6u energy = ',EJ_PNOF6u + print*, 'Coulomb PNOF6h energy = ',EJ_PNOF6h +! print*, 'Coulomb PNOF7 energy = ',EJ_PNOF7 + print*, '' + print*, '*** Exchange energies ***' + print*, 'Exchange PNOF2 energy = ',EK_PNOF2 + print*, 'Exchange PNOF3 energy = ',EK_PNOF3 + print*, 'Exchange PNOF4 energy = ',EK_PNOF4 +! print*, 'Exchange PNOF5 energy = ',EK_PNOF5 + print*, 'Exchange PNOF6d energy = ',EK_PNOF6d + print*, 'Exchange PNOF6u energy = ',EK_PNOF6u + print*, 'Exchange PNOF6h energy = ',EK_PNOF6h +! print*, 'Exchange PNOF7 energy = ',EK_PNOF7 + print*, '' + print*, '*** Time-inversion energies ***' + print*, 'Time-inversion PNOF2 energy = ',EL_PNOF2 + print*, 'Time-inversion PNOF3 energy = ',EL_PNOF3 + print*, 'Time-inversion PNOF4 energy = ',EL_PNOF4 +! print*, 'Time-inversion PNOF5 energy = ',EL_PNOF5 + print*, 'Time-inversion PNOF6d energy = ',EL_PNOF6d + print*, 'Time-inversion PNOF6u energy = ',EL_PNOF6u + print*, 'Time-inversion PNOF6h energy = ',EL_PNOF6h +! print*, 'Time-inversion PNOF7 energy = ',EL_PNOF7 + print*, '' + print*, '*** Two-electron energies ***' + print*, 'J+K+L PNOF2 energy = ',EJ_PNOF2 + EK_PNOF2 + EL_PNOF2 + print*, 'J+K+L PNOF3 energy = ',EJ_PNOF3 + EK_PNOF3 + EL_PNOF3 + print*, 'J+K+L PNOF4 energy = ',EJ_PNOF4 + EK_PNOF4 + EL_PNOF4 +! print*, 'J+K+L PNOF5 energy = ',EJ_PNOF5 + EK_PNOF5 + EL_PNOF5 + print*, 'J+K+L PNOF6d energy = ',EJ_PNOF6d + EK_PNOF6d + EL_PNOF6d + print*, 'J+K+L PNOF6u energy = ',EJ_PNOF6u + EK_PNOF6u + EL_PNOF6u + print*, 'J+K+L PNOF6h energy = ',EJ_PNOF6h + EK_PNOF6h + EL_PNOF6h +! print*, 'J+K+L PNOF7 energy = ',EJ_PNOF7 + EK_PNOF7 + EL_PNOF7 + print*, '' + print*, '*** Total energies ***' + print*, 'Total PNOF2 energy = ',E_PNOF2 + print*, 'Total PNOF3 energy = ',E_PNOF3 + print*, 'Total PNOF4 energy = ',E_PNOF4 +! print*, 'Total PNOF5 energy = ',E_PNOF5 + print*, 'Total PNOF6d energy = ',E_PNOF6d + print*, 'Total PNOF6u energy = ',E_PNOF6u + print*, 'Total PNOF6h energy = ',E_PNOF6h +! print*, 'Total PNOF7 energy = ',E_PNOF7 + print*, '' + +end subroutine NOFT_JKLfunc + diff --git a/plugins/NOFT/NOFT_JKfunc.irp.f b/plugins/NOFT/NOFT_JKfunc.irp.f new file mode 100644 index 00000000..627bc2f1 --- /dev/null +++ b/plugins/NOFT/NOFT_JKfunc.irp.f @@ -0,0 +1,260 @@ +subroutine NOFT_JKfunc(nMO,FL,ET,EV,n) + implicit none + BEGIN_DOC +! JK-only functionals for NOFT + END_DOC + + PROVIDE mo_bielec_integrals_in_map + +! Input variables + + integer,intent(in) :: nMO,FL + double precision,intent(in) :: ET,EV + double precision,intent(in) :: n(nMO) + +! Local variables + + integer :: i,j + double precision :: EJ_SD + double precision :: EK_SD,EK_MBB,EK_POWER,EK_BBC1,EK_BBC2,EK_CA,EK_CGA,EK_GU,EK_ML,EK_MLSIC + double precision :: E_SD,E_MBB,E_POWER,E_BBC1,E_BBC2,E_CA,E_CGA,E_GU,E_ML,E_MLSIC + double precision :: alpha,a0,a1,b1 + double precision :: f_ij,get_mo_bielec_integral + double precision,allocatable :: Jint(:,:),Kint(:,:) + +! Memory allocation + + allocate(Jint(nMO,nMO),Kint(nMO,nMO)) + +! Coulomb, exchange and time-inversion integrals + + do i=1,nMO + do j=1,nMO + + Jint(i,j) = get_mo_bielec_integral(i,j,i,j,mo_integrals_map) + Kint(i,j) = get_mo_bielec_integral(i,j,j,i,mo_integrals_map) + + enddo + enddo + +! Compute SD Coulomb energy + + EJ_SD = 2d0*dot_product(n,matmul(Jint,n)) + +! Compute SD exchange energy + + EK_SD = dot_product(n,matmul(Kint,n)) + +! Compute MBB exchange energy + + EK_MBB = 0d0 + + do i=1,nMO + do j=1,nMO + + EK_MBB = EK_MBB + sqrt(n(i)*n(j))*Kint(i,j) + + enddo + enddo + +! Compute BBC1 exchange energy + + EK_BBC1 = 0d0 + + do i=1,nMO + do j=1,nMO + + if(i /= j .and. i > FL .and. j > FL) then + f_ij = - sqrt(n(i)*n(j)) + else + f_ij = sqrt(n(i)*n(j)) + endif + + EK_BBC1 = EK_BBC1 + f_ij*Kint(i,j) + + enddo + enddo + +! Compute BBC2 exchange energy + + EK_BBC2 = 0d0 + + do i=1,nMO + do j=1,i-1 + + if(i > FL .and. j > FL) then + f_ij = - sqrt(n(i)*n(j)) + elseif(i <= FL .and. j <= FL) then + f_ij = n(i)*n(j) + else + f_ij = sqrt(n(i)*n(j)) + endif + + EK_BBC2 = EK_BBC2 + f_ij*Kint(i,j) + + enddo + + EK_BBC2 = EK_BBC2 + n(i)*Kint(i,i) + + do j=i+1,nMO + + if(i > FL .and. j > FL) then + f_ij = - sqrt(n(i)*n(j)) + elseif(i <= FL .and. j <= FL) then + f_ij = n(i)*n(j) + else + f_ij = sqrt(n(i)*n(j)) + endif + + EK_BBC2 = EK_BBC2 + f_ij*Kint(i,j) + + enddo + enddo + +! Compute CA exchange energy + + EK_CA = 0d0 + + do i=1,nMO + do j=1,nMO + EK_CA = EK_CA + (sqrt(n(i)*n(j)*(1d0 - n(i))*(1d0 - n(j))) + n(i)*n(j))*Kint(i,j) + enddo + enddo + +! Compute CGA exchange energy + + EK_CGA = 0d0 + + do i=1,nMO + do j=1,nMO + EK_CGA = EK_CGA + 0.5d0*(sqrt(n(i)*n(j)*(2d0 - n(i))*(2d0 - n(j))) + n(i)*n(j))*Kint(i,j) + enddo + enddo + +! Compute ML exchange energy + + EK_ML = 0d0 + + a0 = 126.3101d0 + a1 = 2213.33d0 + b1 = 2338.64d0 + + do i=1,nMO + do j=1,nMO + EK_ML = EK_ML + n(i)*n(j)*(a0 + a1*n(i)*n(j))/(1d0 + b1*n(i)*n(j))*Kint(i,j) + enddo + enddo + +! Compute MLSIC exchange energy + + EK_MLSIC = 0d0 + + a0 = 1298.78d0 + a1 = 35114.4d0 + b1 = 36412.2d0 + + do i=1,nMO + + do j=1,i-1 + EK_MLSIC = EK_MLSIC + n(i)*n(j)*(a0 + a1*n(i)*n(j))/(1d0 + b1*n(i)*n(j))*Kint(i,j) + enddo + + EK_MLSIC = EK_MLSIC + n(i)*n(i)*Kint(i,i) + + do j=i+1,nMO + EK_MLSIC = EK_MLSIC + n(i)*n(j)*(a0 + a1*n(i)*n(j))/(1d0 + b1*n(i)*n(j))*Kint(i,j) + enddo + + enddo + +! Compute GU exchange energy + + EK_GU = 0d0 + + do i=1,nMO + + do j=1,i-1 + EK_GU = EK_GU + sqrt(n(i)*n(j))*Kint(i,j) + enddo + + EK_GU = EK_GU + n(i)*n(i)*Kint(i,j) + + do j=i+1,nMO + EK_GU = EK_GU + sqrt(n(i)*n(j))*Kint(i,j) + enddo + + enddo + +! Compute POWER exchange energy + + EK_POWER = 0d0 + alpha = 1d0/3d0 + + do i=1,nMO + do j=1,nMO + EK_POWER = EK_POWER + (n(i)*n(j))**alpha*Kint(i,j) + enddo + enddo + +! Compute total energies + + E_SD = ET + EV + EJ_SD - EK_SD + E_MBB = ET + EV + EJ_SD - EK_MBB + E_BBC1 = ET + EV + EJ_SD - EK_BBC1 + E_BBC2 = ET + EV + EJ_SD - EK_BBC2 + E_CA = ET + EV + EJ_SD - EK_CA + E_CGA = ET + EV + EJ_SD - EK_CGA + E_ML = ET + EV + EJ_SD - EK_ML + E_MLSIC = ET + EV + EJ_SD - EK_MLSIC + E_GU = ET + EV + EJ_SD - EK_GU + E_POWER = ET + EV + EJ_SD - EK_POWER + +! Dump energies + + print*, '*******************************' + print*, '*** JK NOFT functionals ***' + print*, '*******************************' + print*, '' + print*, '*** Coulomb energies ***' + print*, 'Coulomb SD energy = ',EJ_SD + print*, '' + print*, '*** Exchange energies ***' + print*, 'Exchange SD energy = ',-EK_SD + print*, 'Exchange MBB energy = ',-EK_MBB + print*, 'Exchange BBC1 energy = ',-EK_BBC1 + print*, 'Exchange BBC2 energy = ',-EK_BBC2 + print*, 'Exchange CA energy = ',-EK_CA + print*, 'Exchange CGA energy = ',-EK_CGA + print*, 'Exchange ML energy = ',-EK_ML + print*, 'Exchange MLSIC energy = ',-EK_MLSIC + print*, 'Exchange GU energy = ',-EK_GU + print*, 'Exchange POWER energy = ',-EK_POWER + print*, '' + print*, '' + print*, '*** Two-electron energies ***' + print*, 'J+K SD energy = ',EJ_SD - EK_SD + print*, 'J+K MBB energy = ',EJ_SD - EK_MBB + print*, 'J+K BBC1 energy = ',EJ_SD - EK_BBC1 + print*, 'J+K BBC2 energy = ',EJ_SD - EK_BBC2 + print*, 'J+K CA energy = ',EJ_SD - EK_CA + print*, 'J+K CGA energy = ',EJ_SD - EK_CGA + print*, 'J+K ML energy = ',EJ_SD - EK_ML + print*, 'J+K MLSIC energy = ',EJ_SD - EK_MLSIC + print*, 'J+K GU energy = ',EJ_SD - EK_GU + print*, 'J+K POWER energy = ',EJ_SD - EK_POWER + print*, '' + print*, '*** Total energies ***' + print*, 'Total SD energy = ',E_SD + print*, 'Total MBB energy = ',E_MBB + print*, 'Total BBC1 energy = ',E_BBC1 + print*, 'Total BBC2 energy = ',E_BBC2 + print*, 'Total CA energy = ',E_CA + print*, 'Total CGA energy = ',E_CGA + print*, 'Total ML energy = ',E_ML + print*, 'Total MLSIC energy = ',E_MLSIC + print*, 'Total GU energy = ',E_GU + print*, 'Total POWER energy = ',E_POWER + print*, '' + +end subroutine NOFT_JKfunc + diff --git a/plugins/NOFT/NOFT_PT2.irp.f b/plugins/NOFT/NOFT_PT2.irp.f new file mode 100644 index 00000000..466dfa53 --- /dev/null +++ b/plugins/NOFT/NOFT_PT2.irp.f @@ -0,0 +1,46 @@ +subroutine NOFT_PT2(nMO,FL,n) +! Compute the PT2 correction from NOFT + END_DOC + + PROVIDE mo_bielec_integrals_in_map + +! Input variables + + integer,intent(in) :: nMO,FL + double precision,intent(in) :: n(nMO) + +! Local variables + + integer :: i,j,a,b + double precision :: EPT1,EPT2 + double precision :: get_mo_bielec_integral + +! memory allocation + +! Useful quantities + + EPT2 = 0d0 + +! do i=1,FL +! do j=1,FL +! do a=FL+1,nMO +! do b=FL+1,nMO + +! enddo +! enddo +! enddo +! enddo + +! Dump energies + + print*, '*******************************' + print*, '*** PT2 NOFT corrections ***' + print*, '*******************************' + print*, '' + print*, 'Total PT1 energy = ',E_PT1 + print*, 'Total PT2 energy = ',E_PT2 + print*, 'Total PT1+PT2 energy = ',E_PT1 + E_PT2 + print*, '' + +end subroutine NOFT_PT2 + diff --git a/plugins/NOFT/NOFT_core.irp.f b/plugins/NOFT/NOFT_core.irp.f new file mode 100644 index 00000000..abfe0604 --- /dev/null +++ b/plugins/NOFT/NOFT_core.irp.f @@ -0,0 +1,51 @@ +subroutine NOFT_core(nMO,ET,EV,n) + implicit none + BEGIN_DOC +! Core energy for NOFT + END_DOC + +! Input variables + + integer,intent(in) :: nMO + double precision,intent(in) :: n(nMO) + +! Local variables + + integer :: i + +! Output variables + + double precision,intent(out) :: ET,EV + +! Compute kinetic energy + + ET = 0d0 + + do i=1,nMO + ET = ET + n(i)*mo_kinetic_integral(i,i) + enddo + + ET = 2d0*ET + +! Compute nuclear attraction energy + + EV = 0d0 + + do i=1,nMO + EV = EV + n(i)*mo_nucl_elec_integral(i,i) + enddo + + EV = 2d0*EV + +! Dump energies + + print*, '*******************************' + print*, '*** Core energies ***' + print*, '*******************************' + print*, '' + print*, 'Kinetic energy = ',ET + print*, 'Nuclear attraction energy = ',EV + print*, '' + +end subroutine NOFT_core + diff --git a/plugins/NOFT/README.rst b/plugins/NOFT/README.rst new file mode 100644 index 00000000..1ae144e6 --- /dev/null +++ b/plugins/NOFT/README.rst @@ -0,0 +1,12 @@ +==== +NOFT +==== + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/NOFT/ezfio_interface.irp.f b/plugins/NOFT/ezfio_interface.irp.f new file mode 100644 index 00000000..4871424d --- /dev/null +++ b/plugins/NOFT/ezfio_interface.irp.f @@ -0,0 +1,106 @@ +! DO NOT MODIFY BY HAND +! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py +! from file /home/loos/quantum_package/src/NOFT/EZFIO.cfg + + +BEGIN_PROVIDER [ logical, do_jk_functionals ] + implicit none + BEGIN_DOC +! Compute energies for JK-only functionals + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + call ezfio_has_noft_do_jk_functionals(has) + if (has) then + call ezfio_get_noft_do_jk_functionals(do_jk_functionals) + else + print *, 'noft/do_jk_functionals not found in EZFIO file' + stop 1 + endif + endif + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( do_jk_functionals, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read do_jk_functionals with MPI' + endif + IRP_ENDIF + + call write_time(6) + if (mpi_master) then + write(6, *) 'Read do_jk_functionals' + endif + +END_PROVIDER + +BEGIN_PROVIDER [ logical, do_jkl_functionals ] + implicit none + BEGIN_DOC +! Compute energies for JKL-only functionals (PNOFs) + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + call ezfio_has_noft_do_jkl_functionals(has) + if (has) then + call ezfio_get_noft_do_jkl_functionals(do_jkl_functionals) + else + print *, 'noft/do_jkl_functionals not found in EZFIO file' + stop 1 + endif + endif + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( do_jkl_functionals, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read do_jkl_functionals with MPI' + endif + IRP_ENDIF + + call write_time(6) + if (mpi_master) then + write(6, *) 'Read do_jkl_functionals' + endif + +END_PROVIDER + +BEGIN_PROVIDER [ logical, do_pt2_noft ] + implicit none + BEGIN_DOC +! Compute PT2 correction for NOFT + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + call ezfio_has_noft_do_pt2_noft(has) + if (has) then + call ezfio_get_noft_do_pt2_noft(do_pt2_noft) + else + print *, 'noft/do_pt2_noft not found in EZFIO file' + stop 1 + endif + endif + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( do_pt2_noft, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read do_pt2_noft with MPI' + endif + IRP_ENDIF + + call write_time(6) + if (mpi_master) then + write(6, *) 'Read do_pt2_noft' + endif + +END_PROVIDER From b719eea8219fa754f522ca94aa1450f94254692a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Sep 2018 12:36:48 +0200 Subject: [PATCH 17/28] Debug ZMQ --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 3 - src/Determinants/zmq.irp.f | 4 +- src/ZMQ/put_get.irp.f | 202 ++++++++++++++++++- 3 files changed, 203 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index f3c8d835..3978cd2a 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -425,9 +425,6 @@ end function deallocate(seed) call RANDOM_NUMBER(pt2_u) - call RANDOM_NUMBER(pt2_u) - - U = 0 diff --git a/src/Determinants/zmq.irp.f b/src/Determinants/zmq.irp.f index 6c25173a..5751f5a2 100644 --- a/src/Determinants/zmq.irp.f +++ b/src/Determinants/zmq.irp.f @@ -163,6 +163,7 @@ integer function zmq_put_psi_det(zmq_to_qp_run_socket,worker_id) rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,int(N_int*2_8*N_det*bit_kind,8),0) if (rc8 /= N_int*2_8*N_det*bit_kind) then + print *, 'rc=', rc8 zmq_put_psi_det = -1 return endif @@ -195,8 +196,9 @@ integer function zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id) return endif - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size*N_states*8_8,8),0) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size,8)*int(N_states,8)*8_8,0) if (rc8 /= psi_det_size*N_states*8_8) then + print *, 'rc=', rc8 zmq_put_psi_coef = -1 return endif diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f index e86a6daf..bc2334b0 100644 --- a/src/ZMQ/put_get.irp.f +++ b/src/ZMQ/put_get.irp.f @@ -48,7 +48,6 @@ integer function zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_ character*(*), intent(in) :: name double precision, intent(out) :: x(size_x) integer :: rc - integer*8 :: rc8 character*(256) :: msg PROVIDE zmq_state @@ -151,7 +150,6 @@ integer function zmq_get_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_ character*(*), intent(in) :: name integer, intent(out) :: x(size_x) integer :: rc - integer*8 :: rc8 character*(256) :: msg PROVIDE zmq_state @@ -200,6 +198,206 @@ integer function zmq_get_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_ end +integer function zmq_put8_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x) + use f77_zmq + implicit none + BEGIN_DOC +! Put a float vector on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*) :: name + integer*8, intent(in) :: size_x + double precision, intent(in) :: x(size_x) + integer*8 :: rc + character*(256) :: msg + + zmq_put8_dvector = 0 + + write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + zmq_put8_dvector = -1 + return + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0) + if (rc /= size_x*8) then + zmq_put8_dvector = -1 + return + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + zmq_put8_dvector = -1 + return + endif + +end + + +integer function zmq_get8_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_x) + use f77_zmq + implicit none + BEGIN_DOC +! Get a float vector from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer*8, intent(in) :: size_x + character*(*), intent(in) :: name + double precision, intent(out) :: x(size_x) + integer*8 :: rc + character*(256) :: msg + + PROVIDE zmq_state + ! Success + zmq_get8_dvector = 0 + + if (mpi_master) then + write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + zmq_get8_dvector = -1 + print *, irp_here, 'rc /= len(trim(msg))', rc, len(trim(msg)) + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + print *, irp_here, 'msg(1:14) /= get_data_reply', msg(1:14) + zmq_get8_dvector = -1 + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0) + if (rc /= size_x*8) then + print *, irp_here, 'rc /= size_x*8', rc, size_x*8 + zmq_get8_dvector = -1 + go to 10 + endif + endif + + 10 continue + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + integer :: ierr + include 'mpif.h' + call MPI_BCAST (zmq_get8_dvector, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here//': Unable to broadcast zmq_get8_dvector' + stop -1 + endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) + call broadcast_chunks_double(x, size_x) + IRP_ENDIF + +end + + + +integer function zmq_put8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_x) + use f77_zmq + implicit none + BEGIN_DOC +! Put a vector of integers on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*) :: name + integer*8, intent(in) :: size_x + integer, intent(in) :: x(size_x) + integer*8 :: rc + character*(256) :: msg + + zmq_put8_ivector = 0 + + write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + zmq_put8_ivector = -1 + return + endif + + rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*4,0) + if (rc /= size_x*4) then + zmq_put8_ivector = -1 + return + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + zmq_put8_ivector = -1 + return + endif + +end + + +integer function zmq_get8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_x) + use f77_zmq + implicit none + BEGIN_DOC +! Get a vector of integers from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer*8, intent(in) :: size_x + character*(*), intent(in) :: name + integer, intent(out) :: x(size_x) + integer*8 :: rc + character*(256) :: msg + + PROVIDE zmq_state + ! Success + zmq_get8_ivector = 0 + + if (mpi_master) then + write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, name + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + zmq_get8_ivector = -1 + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + zmq_get8_ivector = -1 + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*4,0) + if (rc /= size_x*4) then + zmq_get8_ivector = -1 + go to 10 + endif + endif + + 10 continue + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + integer :: ierr + include 'mpif.h' + call MPI_BCAST (zmq_get8_ivector, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here//': Unable to broadcast zmq_get8_ivector' + stop -1 + endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) + call broadcast_chunks_integer(x, size_x) + IRP_ENDIF + +end + + integer function zmq_put_int(zmq_to_qp_run_socket, worker_id, name, x) use f77_zmq From 4308fa9a1fe52014e69862486ada501166395ea9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Sep 2018 15:52:13 +0200 Subject: [PATCH 18/28] Can diagonalize much larger spaces (ZMQ) --- .../selection_davidson_slave.irp.f | 1 + src/Davidson/davidson_parallel.irp.f | 10 +- src/ZMQ/put_get.irp.f | 124 +++++++++++++++++- 3 files changed, 128 insertions(+), 7 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index e778be0c..a25ff56d 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -36,6 +36,7 @@ subroutine run_wf double precision :: t0, t1 integer, external :: zmq_get_dvector, zmq_get_N_det_generators + integer, external :: zmq_get8_dvector integer, external :: zmq_get_ivector integer, external :: zmq_get_psi, zmq_get_N_det_selectors integer, external :: zmq_get_N_states_diag diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index c92a0489..59393ce4 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -82,11 +82,14 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, double precision, allocatable :: energy(:) integer, external :: zmq_get_dvector + integer, external :: zmq_get_dmatrix allocate(u_t(N_st,N_det)) allocate (energy(N_st)) - if (zmq_get_dvector(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, size(u_t)) == -1) then + ! Warning : dimensions are permuted for performance considerations, It is OK + ! since we get the full matrix + if (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, size(u_t,2), size(u_t,1) ) == -1) then print *, irp_here, ': Unable to get u_t' deallocate(u_t,energy) return @@ -313,6 +316,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) double precision :: energy(N_st) integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag + integer, external :: zmq_put_dmatrix energy = 0.d0 @@ -325,7 +329,9 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then stop 'Unable to put energy on ZMQ server' endif - if (zmq_put_dvector(zmq_to_qp_run_socket, 1, 'u_t', u_t, size(u_t)) == -1) then + ! Warning : dimensions are permuted for performance considerations, It is OK + ! since we get the full matrix + if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, size(u_t,2),size(u_t,1) ) == -1) then stop 'Unable to put u_t on ZMQ server' endif diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f index bc2334b0..ed81efd9 100644 --- a/src/ZMQ/put_get.irp.f +++ b/src/ZMQ/put_get.irp.f @@ -218,17 +218,20 @@ integer function zmq_put8_dvector(zmq_to_qp_run_socket, worker_id, name, x, size rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) if (rc /= len(trim(msg))) then zmq_put8_dvector = -1 + print *, 'Failed in put_data' return endif - rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*8,0) - if (rc /= size_x*8) then + rc = f77_zmq_send8(zmq_to_qp_run_socket,x,size_x*8_8,0) + if (rc /= size_x*8_8) then + print *, 'Failed in send ', rc, size_x*8, size_x, N_det zmq_put8_dvector = -1 return endif rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) if (msg(1:rc) /= 'put_data_reply ok') then + print *, 'Failed in recv ', rc zmq_put8_dvector = -1 return endif @@ -270,7 +273,7 @@ integer function zmq_get8_dvector(zmq_to_qp_run_socket, worker_id, name, x, size go to 10 endif - rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*8,0) + rc = f77_zmq_recv8(zmq_to_qp_run_socket,x,size_x*8,0) if (rc /= size_x*8) then print *, irp_here, 'rc /= size_x*8', rc, size_x*8 zmq_get8_dvector = -1 @@ -300,6 +303,117 @@ end +integer function zmq_put_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2) + use f77_zmq + implicit none + BEGIN_DOC +! Put a float vector on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*) :: name + integer, intent(in) :: size_x1, size_x2 + double precision, intent(in) :: x(size_x1, size_x2) + integer*8 :: rc + integer :: j + character*(256) :: msg + + zmq_put_dmatrix = 0 + + do j=1,size_x2 + write(msg,'(A,1X,I8,1X,A,I8.8)') 'put_data '//trim(zmq_state), worker_id, trim(name), j + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + zmq_put_dmatrix = -1 + print *, 'Failed in put_data', rc, j + return + endif + + rc = f77_zmq_send8(zmq_to_qp_run_socket,x(1,j),size_x1*8_8,0) + if (rc /= size_x1*8_8) then + print *, 'Failed in send ', rc, j + zmq_put_dmatrix = -1 + return + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + print *, 'Failed in recv ', rc, j + zmq_put_dmatrix = -1 + return + endif + enddo + +end + + +integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2) + use f77_zmq + implicit none + BEGIN_DOC +! Get a float vector from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer, intent(in) :: size_x1, size_x2 + character*(*), intent(in) :: name + double precision, intent(out) :: x(size_x1,size_x2) + integer*8 :: rc + integer :: j + character*(256) :: msg + + PROVIDE zmq_state + ! Success + zmq_get_dmatrix = 0 + + if (mpi_master) then + do j=1, size_x2 + write(msg,'(A,1X,I8,1X,A,I8.8)') 'get_data '//trim(zmq_state), worker_id, trim(name),j + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + zmq_get_dmatrix = -1 + print *, irp_here, 'rc /= len(trim(msg))', rc, len(trim(msg)) + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + print *, irp_here, 'msg(1:14) /= get_data_reply', msg(1:14) + zmq_get_dmatrix = -1 + go to 10 + endif + + rc = f77_zmq_recv8(zmq_to_qp_run_socket,x(1,j),size_x1*8,0) + if (rc /= size_x1*8) then + print *, irp_here, 'rc /= size_x1*8', rc, size_x1*8 + zmq_get_dmatrix = -1 + go to 10 + endif + enddo + endif + + 10 continue + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + integer :: ierr + include 'mpif.h' + call MPI_BCAST (zmq_get_dmatrix, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here//': Unable to broadcast zmq_get_dmatrix' + stop -1 + endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) + call broadcast_chunks_double(x, int(size_x1,8)*int(size_x2,8)) + IRP_ENDIF + +end + + + integer function zmq_put8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_x) use f77_zmq implicit none @@ -323,7 +437,7 @@ integer function zmq_put8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size return endif - rc = f77_zmq_send(zmq_to_qp_run_socket,x,size_x*4,0) + rc = f77_zmq_send8(zmq_to_qp_run_socket,x,size_x*4,0) if (rc /= size_x*4) then zmq_put8_ivector = -1 return @@ -364,7 +478,7 @@ integer function zmq_get8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size go to 10 endif - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + rc = f77_zmq_recv8(zmq_to_qp_run_socket,msg,len(msg),0) if (msg(1:14) /= 'get_data_reply') then zmq_get8_ivector = -1 go to 10 From 98b2384d43416f5d306d70c626ff8e9ac9448891 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Sep 2018 10:02:55 +0200 Subject: [PATCH 19/28] Optimized S2 --- .../diag_restart_save_all_states.irp.f | 4 +- src/Determinants/occ_pattern.irp.f | 43 ++++++++++++------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/DavidsonUndressed/diag_restart_save_all_states.irp.f b/src/DavidsonUndressed/diag_restart_save_all_states.irp.f index 3bdc37c5..9701acc4 100644 --- a/src/DavidsonUndressed/diag_restart_save_all_states.irp.f +++ b/src/DavidsonUndressed/diag_restart_save_all_states.irp.f @@ -9,8 +9,6 @@ subroutine routine implicit none call diagonalize_CI print*,'N_det = ',N_det - call save_wavefunction_general(N_det,N_states_diag,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted) - - + call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted) end diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index f8358a17..4bddcf93 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -51,16 +51,27 @@ subroutine occ_pattern_to_dets(o,d,sze,n_alpha,Nint) integer(bit_kind),intent(in) :: o(Nint,2) integer(bit_kind),intent(out) :: d(Nint,2,sze) - integer :: i, k, nt, na, nd, amax + integer :: i, l, k, nt, na, nd, amax integer :: list_todo(2*n_alpha) integer :: list_a(2*n_alpha) + integer :: ishift amax = n_alpha do k=1,Nint amax -= popcnt( o(k,2) ) enddo - call bitstring_to_list(o(1,1), list_todo, nt, Nint) + nt = 0 + ishift = 2 + do i=1,Nint + l = o(i,1) + do while (l /= 0_bit_kind) + nt = nt+1 + list_todo(nt) = ishift+popcnt(l-1_bit_kind) - popcnt(l) + l = iand(l,l-1_bit_kind) + enddo + ishift = ishift + bit_kind_size + enddo na = 0 nd = 0 @@ -69,7 +80,7 @@ subroutine occ_pattern_to_dets(o,d,sze,n_alpha,Nint) sze = nd - integer :: ne(2), l + integer :: ne(2) l=0 do i=1,nd ne(1) = 0 @@ -90,6 +101,7 @@ subroutine occ_pattern_to_dets(o,d,sze,n_alpha,Nint) end + recursive subroutine rec_occ_pattern_to_dets(list_todo,nt,list_a,na,d,nd,sze,amax,Nint) use bitmasks implicit none @@ -98,6 +110,7 @@ recursive subroutine rec_occ_pattern_to_dets(list_todo,nt,list_a,na,d,nd,sze,am integer,intent(inout) :: list_todo(nt) integer, intent(inout) :: list_a(na+1),nd integer(bit_kind),intent(inout) :: d(Nint,2,sze) + integer :: iint, ipos, i,j,k if (na == amax) then nd += 1 @@ -106,14 +119,17 @@ recursive subroutine rec_occ_pattern_to_dets(list_todo,nt,list_a,na,d,nd,sze,am print *, irp_here, ': sze = ', sze stop 'bug in rec_occ_pattern_to_dets' endif - if (na > 0) then - call list_to_bitstring( d(1,1,nd), list_a, na, Nint) - endif - if (nt > 0) then - call list_to_bitstring( d(1,2,nd), list_todo, nt, Nint) - endif + do i=1,na + iint = ishft(list_a(i)-1,-bit_kind_shift) + 1 + ipos = list_a(i)-ishft((iint-1),bit_kind_shift)-1 + d(iint,1,nd) = ibset( d(iint,1,nd), ipos ) + enddo + do i=1,nt + iint = ishft(list_todo(i)-1,-bit_kind_shift) + 1 + ipos = list_todo(i)-ishft((iint-1),bit_kind_shift)-1 + d(iint,2,nd) = ibset( d(iint,2,nd), ipos ) + enddo else - integer :: i, j, k integer, allocatable :: list_todo_tmp(:) allocate (list_todo_tmp(nt)) do i=1,nt @@ -317,7 +333,7 @@ subroutine make_s2_eigenfunction smax = s ithread=0 !$ ithread = omp_get_thread_num() - !$OMP DO + !$OMP DO SCHEDULE (dynamic,1000) do i=1,N_occ_pattern call occ_pattern_to_dets_size(psi_occ_pattern(1,1,i),s,elec_alpha_num,N_int) s += 1 @@ -330,10 +346,7 @@ subroutine make_s2_eigenfunction do j=1,s if (.not. is_in_wavefunction(d(1,1,j), N_int) ) then N_det_new += 1 - do k=1,N_int - det_buffer(k,1,N_det_new) = d(k,1,j) - det_buffer(k,2,N_det_new) = d(k,2,j) - enddo + det_buffer(:,:,N_det_new) = d(:,:,j) if (N_det_new == bufsze) then call fill_H_apply_buffer_no_selection(bufsze,det_buffer,N_int,ithread) N_det_new = 0 From 1139d31fbb558892d73fbb784c913e36250ddcc0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Sep 2018 10:43:30 +0200 Subject: [PATCH 20/28] size(kind=8) --- src/Bitmask/mpi.irp.f | 2 +- src/Davidson/davidson_parallel.irp.f | 50 ++++-- src/Determinants/determinants.irp.f | 8 +- src/Determinants/zmq.irp.f | 225 +++++++++------------------ src/MPI/mpi.irp.f | 2 +- src/ZMQ/put_get.irp.f | 172 +++++++++++++++++--- 6 files changed, 277 insertions(+), 182 deletions(-) diff --git a/src/Bitmask/mpi.irp.f b/src/Bitmask/mpi.irp.f index 11d6777a..be10f07a 100644 --- a/src/Bitmask/mpi.irp.f +++ b/src/Bitmask/mpi.irp.f @@ -21,7 +21,7 @@ END_PROVIDER subroutine broadcast_chunks_bit_kind(A, LDA) use bitmasks implicit none - integer, intent(in) :: LDA + integer*8, intent(in) :: LDA integer(bit_kind), intent(inout) :: A(LDA) BEGIN_DOC ! Broadcast with chunks of ~2GB diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 59393ce4..5aa7a84a 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -13,6 +13,24 @@ end subroutine davidson_slave_tcp(i) implicit none integer, intent(in) :: i + integer :: nproc_target + double precision :: r1 + if (qp_max_mem > 0) then + nproc_target = nproc + r1 = 8.d0*(3.d0*dble(N_det*N_states_diag) & + + nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_states_diag*N_det))/(1024.d0**3) + do while (r1 > qp_max_mem) + nproc_target = nproc_target - 1 + r1 = 8.d0*(3.d0*dble(N_det*N_states_diag) & + + nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_states_diag*N_det))/(1024.d0**3) + if (nproc_target == 0) then + nproc_target = 1 + exit + endif + enddo + call omp_set_num_threads(nproc_target) + call write_int(6,nproc_target,'Number of threads for diagonalization') + endif call davidson_run_slave(0,i) end @@ -75,7 +93,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, ! Get wave function (u_t) ! ----------------------- - integer :: rc + integer :: rc, ni, nj integer*8 :: rc8 integer :: N_states_read, N_det_read, psi_det_size_read integer :: N_det_selectors_read, N_det_generators_read @@ -87,9 +105,16 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, allocate(u_t(N_st,N_det)) allocate (energy(N_st)) - ! Warning : dimensions are permuted for performance considerations, It is OK - ! since we get the full matrix - if (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, size(u_t,2), size(u_t,1) ) == -1) then + ! Warning : dimensions are modified for efficiency, It is OK since we get the + ! full matrix + if (size(u_t,kind=8) < 8388608_8) then + ni = size(u_t) + nj = 1 + else + ni = 8388608 + nj = size(u_t,kind=8)/8388608_8 + 1 + endif + if (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then print *, irp_here, ': Unable to get u_t' deallocate(u_t,energy) return @@ -105,7 +130,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, include 'mpif.h' integer :: ierr - call broadcast_chunks_double(u_t,size(u_t)) + call broadcast_chunks_double(u_t,size(u_t,kind=8)) IRP_ENDIF @@ -311,7 +336,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson') character*(512) :: task - integer :: rc + integer :: rc, ni, nj integer*8 :: rc8 double precision :: energy(N_st) @@ -329,9 +354,16 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then stop 'Unable to put energy on ZMQ server' endif - ! Warning : dimensions are permuted for performance considerations, It is OK - ! since we get the full matrix - if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, size(u_t,2),size(u_t,1) ) == -1) then + if (size(u_t) < 8388608) then + ni = size(u_t) + nj = 1 + else + ni = 8388608 + nj = size(u_t)/8388608 + 1 + endif + ! Warning : dimensions are modified for efficiency, It is OK since we get the + ! full matrix + if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then stop 'Unable to put u_t on ZMQ server' endif diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 1e58e262..e7ade63b 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -76,7 +76,7 @@ BEGIN_PROVIDER [integer, max_degree_exc] enddo END_PROVIDER -BEGIN_PROVIDER [ integer, psi_det_size ] +BEGIN_PROVIDER [ integer*8, psi_det_size ] implicit none BEGIN_DOC ! Size of the psi_det/psi_coef arrays @@ -88,9 +88,9 @@ BEGIN_PROVIDER [ integer, psi_det_size ] if (exists) then call ezfio_get_determinants_n_det(psi_det_size) else - psi_det_size = 1 + psi_det_size = 1_8 endif - psi_det_size = max(psi_det_size,100000) + psi_det_size = max(psi_det_size,100000_8) call write_int(6,psi_det_size,'Dimension of the psi arrays') endif IRP_IF MPI_DEBUG @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ integer, psi_det_size ] IRP_IF MPI include 'mpif.h' integer :: ierr - call MPI_BCAST( psi_det_size, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST( psi_det_size, 1, MPI_INTEGER8, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then stop 'Unable to read psi_det_size with MPI' endif diff --git a/src/Determinants/zmq.irp.f b/src/Determinants/zmq.irp.f index 5751f5a2..0686be59 100644 --- a/src/Determinants/zmq.irp.f +++ b/src/Determinants/zmq.irp.f @@ -140,7 +140,7 @@ psi_det_size ;; END_TEMPLATE -integer function zmq_put_psi_det(zmq_to_qp_run_socket,worker_id) +integer*8 function zmq_put_psi_det(zmq_to_qp_run_socket,worker_id) use f77_zmq implicit none BEGIN_DOC @@ -148,34 +148,24 @@ integer function zmq_put_psi_det(zmq_to_qp_run_socket,worker_id) END_DOC integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer, intent(in) :: worker_id - integer :: rc integer*8 :: rc8 character*(256) :: msg - zmq_put_psi_det = 0 + integer*8 :: zmq_put_i8matrix + integer :: ni, nj - write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, 'psi_det' - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) - if (rc /= len(trim(msg))) then - zmq_put_psi_det = -1 - return - endif - - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,int(N_int*2_8*N_det*bit_kind,8),0) - if (rc8 /= N_int*2_8*N_det*bit_kind) then - print *, 'rc=', rc8 - zmq_put_psi_det = -1 - return - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:rc) /= 'put_data_reply ok') then - zmq_put_psi_det = -1 - return + if (size(psi_det,kind=8) <= 8388608_8) then + ni = size(psi_det,kind=4) + nj = 1 + else + ni = 8388608_8 + nj = int(size(psi_det,kind=8)/8388608_8,4) + 1 endif + rc8 = zmq_put_i8matrix(zmq_to_qp_run_socket, 1, 'psi_det', psi_det, ni, nj, size(psi_det,kind=8)) + zmq_put_psi_det = rc8 end -integer function zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id) +integer*8 function zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id) use f77_zmq implicit none BEGIN_DOC @@ -183,32 +173,75 @@ integer function zmq_put_psi_coef(zmq_to_qp_run_socket,worker_id) END_DOC integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer, intent(in) :: worker_id - integer :: rc integer*8 :: rc8 character*(256) :: msg zmq_put_psi_coef = 0 - write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, 'psi_coef' - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) - if (rc /= len(trim(msg))) then - zmq_put_psi_coef = -1 - return - endif + integer*8 :: zmq_put_dmatrix + integer :: ni, nj - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size,8)*int(N_states,8)*8_8,0) - if (rc8 /= psi_det_size*N_states*8_8) then - print *, 'rc=', rc8 - zmq_put_psi_coef = -1 - return + if (size(psi_coef,kind=8) <= 8388608_8) then + ni = size(psi_coef,kind=4) + nj = 1 + else + ni = 8388608 + nj = int(size(psi_coef,kind=8)/8388608_8,4) + 1 endif + rc8 = zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'psi_coef', psi_coef, ni, nj, size(psi_coef,kind=8) ) + zmq_put_psi_coef = rc8 +end - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:rc) /= 'put_data_reply ok') then - zmq_put_psi_coef = -1 - return +integer*8 function zmq_get_psi_det(zmq_to_qp_run_socket,worker_id) + use f77_zmq + implicit none + BEGIN_DOC +! Get psi_det on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer*8 :: rc8 + character*(256) :: msg + + integer*8 :: zmq_get_i8matrix + integer :: ni, nj + + if (size(psi_det,kind=8) <= 8388608_8) then + ni = size(psi_det,kind=4) + nj = 1 + else + ni = 8388608 + nj = int(size(psi_det,kind=8)/8388608_8,4) + 1 endif + rc8 = zmq_get_i8matrix(zmq_to_qp_run_socket, 1, 'psi_det', psi_det, ni, nj, size(psi_det,kind=8)) + zmq_get_psi_det = rc8 +end +integer*8 function zmq_get_psi_coef(zmq_to_qp_run_socket,worker_id) + use f77_zmq + implicit none + BEGIN_DOC +! get psi_coef on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer*8 :: rc8 + character*(256) :: msg + + zmq_get_psi_coef = 0_8 + + integer*8 :: zmq_get_dmatrix + integer :: ni, nj + + if (size(psi_coef,kind=8) <= 8388608_8) then + ni = size(psi_coef,kind=4) + nj = 1 + else + ni = 8388608 + nj = int(size(psi_coef,kind=8)/8388608_8,4) + 1 + endif + rc8 = zmq_get_dmatrix(zmq_to_qp_run_socket, 1, 'psi_coef', psi_coef, ni, nj, size(psi_coef,kind=8) ) + zmq_get_psi_coef = rc8 end !--------------------------------------------------------------------------- @@ -226,8 +259,8 @@ integer function zmq_get_psi(zmq_to_qp_run_socket, worker_id) integer, external :: zmq_get_N_states integer, external :: zmq_get_N_det integer, external :: zmq_get_psi_det_size - integer, external :: zmq_get_psi_det - integer, external :: zmq_get_psi_coef + integer*8, external :: zmq_get_psi_det + integer*8, external :: zmq_get_psi_coef zmq_get_psi = 0 @@ -244,21 +277,21 @@ integer function zmq_get_psi(zmq_to_qp_run_socket, worker_id) return endif - if (size(psi_det) /= N_int*2_8*psi_det_size*bit_kind) then + if (size(psi_det,kind=8) /= N_int*2_8*psi_det_size*bit_kind) then deallocate(psi_det) allocate(psi_det(N_int,2,psi_det_size)) endif - if (size(psi_coef) /= psi_det_size*N_states) then + if (size(psi_coef,kind=8) /= psi_det_size*N_states) then deallocate(psi_coef) allocate(psi_coef(psi_det_size,N_states)) endif - if (zmq_get_psi_det(zmq_to_qp_run_socket, worker_id) == -1) then + if (zmq_get_psi_det(zmq_to_qp_run_socket, worker_id) == -1_8) then zmq_get_psi = -1 return endif - if (zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) == -1) then + if (zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) == -1_8) then zmq_get_psi = -1 return endif @@ -267,109 +300,5 @@ integer function zmq_get_psi(zmq_to_qp_run_socket, worker_id) end -integer function zmq_get_psi_det(zmq_to_qp_run_socket, worker_id) - use f77_zmq - implicit none - BEGIN_DOC -! Get psi_det from the qp_run scheduler - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - integer, intent(in) :: worker_id - integer :: rc - integer*8 :: rc8 - character*(256) :: msg - - PROVIDE zmq_state - zmq_get_psi_det = 0 - if (mpi_master) then - write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, 'psi_det' - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) - if (rc /= len(trim(msg))) then - zmq_get_psi_det = -1 - go to 10 - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:14) /= 'get_data_reply') then - zmq_get_psi_det = -1 - go to 10 - endif - - rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,int(N_int*2_8*N_det*bit_kind,8),0) - if (rc8 /= N_int*2_8*N_det*bit_kind) then - zmq_get_psi_det = -1 - go to 10 - endif - endif - - 10 continue - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST (zmq_get_psi_det, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to broadcast zmq_get_psi_det' - endif - call broadcast_chunks_bit_kind(psi_det,size(psi_det)) - IRP_ENDIF - -end - -integer function zmq_get_psi_coef(zmq_to_qp_run_socket, worker_id) - use f77_zmq - implicit none - BEGIN_DOC -! Get psi_coef from the qp_run scheduler - END_DOC - integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - integer, intent(in) :: worker_id - integer :: rc - integer*8 :: rc8 - character*(256) :: msg - - PROVIDE zmq_state psi_det_size - zmq_get_psi_coef = 0 - if (mpi_master) then - write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, 'psi_coef' - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) - if (rc /= len(trim(msg))) then - zmq_get_psi_coef = -1 - go to 10 - endif - - rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) - if (msg(1:14) /= 'get_data_reply') then - zmq_get_psi_coef = -1 - go to 10 - endif - - rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size*N_states*8_8,8),0) - if (rc8 /= psi_det_size*N_states*8_8) then - zmq_get_psi_coef = -1 - go to 10 - endif - endif - - 10 continue - - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST (zmq_get_psi_coef, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to broadcast zmq_get_psi_coef' - endif - call broadcast_chunks_double(psi_coef,size(psi_coef)) - IRP_ENDIF - -end diff --git a/src/MPI/mpi.irp.f b/src/MPI/mpi.irp.f index 41694c8f..3517754a 100644 --- a/src/MPI/mpi.irp.f +++ b/src/MPI/mpi.irp.f @@ -65,7 +65,7 @@ BEGIN_TEMPLATE subroutine broadcast_chunks_$double(A, LDA) implicit none - integer, intent(in) :: LDA + integer*8, intent(in) :: LDA $type, intent(inout) :: A(LDA) BEGIN_DOC ! Broadcast with chunks of ~2GB diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f index ed81efd9..70969f52 100644 --- a/src/ZMQ/put_get.irp.f +++ b/src/ZMQ/put_get.irp.f @@ -93,7 +93,7 @@ integer function zmq_get_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_ stop -1 endif call MPI_BARRIER(MPI_COMM_WORLD,ierr) - call broadcast_chunks_double(x, size_x) + call broadcast_chunks_double(x, int(size_x,8)) IRP_ENDIF end @@ -192,7 +192,7 @@ integer function zmq_get_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_ stop -1 endif call MPI_BARRIER(MPI_COMM_WORLD,ierr) - call broadcast_chunks_integer(x, size_x) + call broadcast_chunks_integer(x, int(size_x,8)) IRP_ENDIF end @@ -273,9 +273,9 @@ integer function zmq_get8_dvector(zmq_to_qp_run_socket, worker_id, name, x, size go to 10 endif - rc = f77_zmq_recv8(zmq_to_qp_run_socket,x,size_x*8,0) + rc = f77_zmq_recv8(zmq_to_qp_run_socket,x,size_x*8_8,0) if (rc /= size_x*8) then - print *, irp_here, 'rc /= size_x*8', rc, size_x*8 + print *, irp_here, 'rc /= size_x*8', rc, size_x*8_8 zmq_get8_dvector = -1 go to 10 endif @@ -303,7 +303,7 @@ end -integer function zmq_put_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2) +integer function zmq_put_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2, sze) use f77_zmq implicit none BEGIN_DOC @@ -313,14 +313,19 @@ integer function zmq_put_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ integer, intent(in) :: worker_id character*(*) :: name integer, intent(in) :: size_x1, size_x2 + integer*8, intent(in) :: sze double precision, intent(in) :: x(size_x1, size_x2) - integer*8 :: rc + integer*8 :: rc, ni integer :: j character*(256) :: msg zmq_put_dmatrix = 0 + ni = size_x1 do j=1,size_x2 + if (j == size_x2) then + ni = int(sze - int(j-1,8)*int(size_x1,8),8) + endif write(msg,'(A,1X,I8,1X,A,I8.8)') 'put_data '//trim(zmq_state), worker_id, trim(name), j rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) if (rc /= len(trim(msg))) then @@ -329,8 +334,8 @@ integer function zmq_put_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ return endif - rc = f77_zmq_send8(zmq_to_qp_run_socket,x(1,j),size_x1*8_8,0) - if (rc /= size_x1*8_8) then + rc = f77_zmq_send8(zmq_to_qp_run_socket,x(1,j),ni*8_8,0) + if (rc /= ni*8_8) then print *, 'Failed in send ', rc, j zmq_put_dmatrix = -1 return @@ -347,7 +352,7 @@ integer function zmq_put_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ end -integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2) +integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2, sze) use f77_zmq implicit none BEGIN_DOC @@ -356,10 +361,11 @@ integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer, intent(in) :: worker_id integer, intent(in) :: size_x1, size_x2 + integer*8, intent(in) :: sze character*(*), intent(in) :: name double precision, intent(out) :: x(size_x1,size_x2) - integer*8 :: rc - integer :: j + integer*8 :: rc, ni + integer*8 :: j character*(256) :: msg PROVIDE zmq_state @@ -367,7 +373,11 @@ integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ zmq_get_dmatrix = 0 if (mpi_master) then + ni = size_x1 do j=1, size_x2 + if (j == size_x2) then + ni = sze - (j-1)*size_x1 + endif write(msg,'(A,1X,I8,1X,A,I8.8)') 'get_data '//trim(zmq_state), worker_id, trim(name),j rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) if (rc /= len(trim(msg))) then @@ -383,9 +393,9 @@ integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ go to 10 endif - rc = f77_zmq_recv8(zmq_to_qp_run_socket,x(1,j),size_x1*8,0) - if (rc /= size_x1*8) then - print *, irp_here, 'rc /= size_x1*8', rc, size_x1*8 + rc = f77_zmq_recv8(zmq_to_qp_run_socket,x(1,j),ni*8_8,0) + if (rc /= ni*8_8) then + print *, irp_here, 'rc /= size_x1*8', rc, ni*8_8 zmq_get_dmatrix = -1 go to 10 endif @@ -407,7 +417,7 @@ integer function zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, name, x, size_ stop -1 endif call MPI_BARRIER(MPI_COMM_WORLD,ierr) - call broadcast_chunks_double(x, int(size_x1,8)*int(size_x2,8)) + call broadcast_chunks_double(x, sze) IRP_ENDIF end @@ -437,8 +447,8 @@ integer function zmq_put8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size return endif - rc = f77_zmq_send8(zmq_to_qp_run_socket,x,size_x*4,0) - if (rc /= size_x*4) then + rc = f77_zmq_send8(zmq_to_qp_run_socket,x,size_x*4_8,0) + if (rc /= size_x*4_8) then zmq_put8_ivector = -1 return endif @@ -478,13 +488,13 @@ integer function zmq_get8_ivector(zmq_to_qp_run_socket, worker_id, name, x, size go to 10 endif - rc = f77_zmq_recv8(zmq_to_qp_run_socket,msg,len(msg),0) + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) if (msg(1:14) /= 'get_data_reply') then zmq_get8_ivector = -1 go to 10 endif - rc = f77_zmq_recv(zmq_to_qp_run_socket,x,size_x*4,0) + rc = f77_zmq_recv8(zmq_to_qp_run_socket,x,size_x*4_8,0) if (rc /= size_x*4) then zmq_get8_ivector = -1 go to 10 @@ -591,3 +601,127 @@ integer function zmq_get_int(zmq_to_qp_run_socket, worker_id, name, x) end + +integer function zmq_put_i8matrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2, sze) + use f77_zmq + implicit none + BEGIN_DOC +! Put a float vector on the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + character*(*) :: name + integer, intent(in) :: size_x1, size_x2 + integer*8, intent(in) :: sze + integer*8, intent(in) :: x(size_x1, size_x2) + integer*8 :: rc, ni + integer*8 :: j + character*(256) :: msg + + zmq_put_i8matrix = 0 + + ni = size_x1 + do j=1,size_x2 + if (j == size_x2) then + ni = sze - (j-1_8)*int(size_x1,8) + endif + write(msg,'(A,1X,I8,1X,A,I8.8)') 'put_data '//trim(zmq_state), worker_id, trim(name), j + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + if (rc /= len(trim(msg))) then + zmq_put_i8matrix = -1 + print *, irp_here, 'Failed in put_data', rc, j + return + endif + + rc = f77_zmq_send8(zmq_to_qp_run_socket,x(1,j),ni*8_8,0) + if (rc /= ni*8_8) then + print *, irp_here, 'Failed in send ', rc, j + zmq_put_i8matrix = -1 + return + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:rc) /= 'put_data_reply ok') then + print *, irp_here, 'Failed in recv ', rc, j + zmq_put_i8matrix = -1 + return + endif + enddo + +end + + +integer function zmq_get_i8matrix(zmq_to_qp_run_socket, worker_id, name, x, size_x1, size_x2, sze) + use f77_zmq + implicit none + BEGIN_DOC +! Get a float vector from the qp_run scheduler + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer, intent(in) :: worker_id + integer, intent(in) :: size_x1, size_x2 + integer*8, intent(in) :: sze + character*(*), intent(in) :: name + double precision, intent(out) :: x(size_x1,size_x2) + integer*8 :: rc, ni + integer*8 :: j + character*(256) :: msg + + PROVIDE zmq_state + ! Success + zmq_get_i8matrix = 0 + + if (mpi_master) then + ni = size_x1 + do j=1, size_x2 + if (j == size_x2) then + ni = sze - (j-1)*size_x1 + endif + write(msg,'(A,1X,I8,1X,A,I8.8)') 'get_data '//trim(zmq_state), worker_id, trim(name),j + rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0) + if (rc /= len(trim(msg))) then + zmq_get_i8matrix = -1 + print *, irp_here, 'rc /= len(trim(msg))', rc, len(trim(msg)) + go to 10 + endif + + rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0) + if (msg(1:14) /= 'get_data_reply') then + print *, irp_here, 'msg(1:14) /= get_data_reply', msg(1:14) + zmq_get_i8matrix = -1 + go to 10 + endif + + rc = f77_zmq_recv8(zmq_to_qp_run_socket,x(1,j),ni*8_8,0) + if (rc /= ni*8_8) then + print *, irp_here, 'rc /= ni*8', rc, ni*8_8 + zmq_get_i8matrix = -1 + go to 10 + endif + enddo + endif + + 10 continue + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + integer :: ierr + include 'mpif.h' + call MPI_BCAST (zmq_get_i8matrix, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + print *, irp_here//': Unable to broadcast zmq_get_i8matrix' + stop -1 + endif + call MPI_BARRIER(MPI_COMM_WORLD,ierr) + call broadcast_chunks_double(x, sze) + IRP_ENDIF + +end + + + + + From 5e19eb4aaa6325459744b871043e6c9079d6ba67 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Sep 2018 11:13:38 +0200 Subject: [PATCH 21/28] Fixed bug introduced in 98b2384d43416 --- src/Determinants/occ_pattern.irp.f | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index 4bddcf93..f71c2721 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -61,17 +61,18 @@ subroutine occ_pattern_to_dets(o,d,sze,n_alpha,Nint) amax -= popcnt( o(k,2) ) enddo - nt = 0 - ishift = 2 - do i=1,Nint - l = o(i,1) - do while (l /= 0_bit_kind) - nt = nt+1 - list_todo(nt) = ishift+popcnt(l-1_bit_kind) - popcnt(l) - l = iand(l,l-1_bit_kind) - enddo - ishift = ishift + bit_kind_size - enddo + call bitstring_to_list(o(1,1), list_todo, nt, Nint) +! nt = 0 +! ishift = 2 +! do i=1,Nint +! l = o(i,1) +! do while (l /= 0_bit_kind) +! nt = nt+1 +! list_todo(nt) = ishift+popcnt(l-1_bit_kind) - popcnt(l) +! l = iand(l,l-1_bit_kind) +! enddo +! ishift = ishift + bit_kind_size +! enddo na = 0 nd = 0 From 2058af1af56ff83bb53562b0c18c2dc4572ed464 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 23 Sep 2018 22:27:41 +0200 Subject: [PATCH 22/28] Memory control not needed on slaves --- src/Davidson/davidson_parallel.irp.f | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 5aa7a84a..01f8b9cc 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -13,24 +13,6 @@ end subroutine davidson_slave_tcp(i) implicit none integer, intent(in) :: i - integer :: nproc_target - double precision :: r1 - if (qp_max_mem > 0) then - nproc_target = nproc - r1 = 8.d0*(3.d0*dble(N_det*N_states_diag) & - + nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_states_diag*N_det))/(1024.d0**3) - do while (r1 > qp_max_mem) - nproc_target = nproc_target - 1 - r1 = 8.d0*(3.d0*dble(N_det*N_states_diag) & - + nproc_target*(4.d0*N_det_alpha_unique+2.d0*N_states_diag*N_det))/(1024.d0**3) - if (nproc_target == 0) then - nproc_target = 1 - exit - endif - enddo - call omp_set_num_threads(nproc_target) - call write_int(6,nproc_target,'Number of threads for diagonalization') - endif call davidson_run_slave(0,i) end From 4a48a6b94f4021846bd03de6afb5fc13ecd5a1b6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 25 Sep 2018 09:49:33 +0200 Subject: [PATCH 23/28] Fixed shiftedBk --- plugins/DavidsonDressed/diagonalize_CI.irp.f | 4 +--- plugins/dress_zmq/alpha_factory.irp.f | 4 ---- plugins/dress_zmq/dress_stoch_routines.irp.f | 7 +++---- plugins/dress_zmq/run_dress_slave.irp.f | 18 ++++++++++++++---- plugins/shiftedbk/selection_types.f90 | 9 --------- src/Determinants/determinants.irp.f | 2 +- src/Determinants/occ_pattern.irp.f | 11 ----------- 7 files changed, 19 insertions(+), 36 deletions(-) delete mode 100644 plugins/shiftedbk/selection_types.f90 diff --git a/plugins/DavidsonDressed/diagonalize_CI.irp.f b/plugins/DavidsonDressed/diagonalize_CI.irp.f index ddea2950..9940ee86 100644 --- a/plugins/DavidsonDressed/diagonalize_CI.irp.f +++ b/plugins/DavidsonDressed/diagonalize_CI.irp.f @@ -63,9 +63,6 @@ END_PROVIDER call davidson_diag_HS2(psi_det,CI_eigenvectors_dressed, CI_eigenvectors_s2_dressed,& size(CI_eigenvectors_dressed,1), CI_electronic_energy_dressed,& N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,1) -! call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,& -! N_states_diag,size(CI_eigenvectors_dressed,1)) - else if (diag_algorithm == "Lapack") then @@ -159,6 +156,7 @@ subroutine diagonalize_CI_dressed ! eigenstates of the CI matrix END_DOC integer :: i,j + PROVIDE delta_ij do j=1,N_states do i=1,N_det psi_coef(i,j) = CI_eigenvectors_dressed(i,j) diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index d59ab032..cdb8a905 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -81,10 +81,6 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index hole (k,2) = iand(psi_det_generators(k,2,i_generator), generators_bitmask(k,2,s_hole,bitmask_index)) particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), generators_bitmask(k,1,s_part,bitmask_index)) particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), generators_bitmask(k,2,s_part,bitmask_index)) - !hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) - !hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) - !particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) - !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) enddo integer :: N_holes(2), N_particles(2) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 38434224..9280b688 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -284,7 +284,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) if (zmq_put_dvector(zmq_to_qp_run_socket,1,"state_average_weight",state_average_weight,N_states) == -1) then stop 'Unable to put state_average_weight on ZMQ server' endif - if (zmq_put_int(zmq_to_qp_run_socket,1,"dress_stoch_istate",dress_stoch_istate) == -1) then + if (zmq_put_int(zmq_to_qp_run_socket,1,'dress_stoch_istate',dress_stoch_istate) == -1) then stop 'Unable to put dress_stoch_istate on ZMQ server' endif if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then @@ -483,9 +483,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer, external :: zmq_delete_tasks, dress_find_sample logical :: found integer :: worker_id + worker_id=1 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,1) found = .false. delta = 0d0 @@ -542,7 +542,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(dabs(error / avg) <= relative_error) then integer, external :: zmq_put_dvector integer, external :: zmq_put_int - i= zmq_put_int(zmq_to_qp_run_socket, worker_id, "ending", (m-1)) + i= zmq_put_int(zmq_to_qp_run_socket, worker_id, 'ending', (m-1)) found = .true. end if else @@ -607,7 +607,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, !end do !print *, "SUM", E(1)+sum(edi(:)) !print *, "DOT", E(1)+tmp - call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index b9d73cb9..39e430a1 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -62,6 +62,7 @@ subroutine run_dress_slave(thread,iproce,energy) cp_done = 0 cp_sent = 0 will_send = 0 + cp_max(:) = 0 double precision :: hij, sij, tmp purge_task_id = 0 @@ -76,7 +77,11 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP PRIVATE(task_buf, ntask_buf,time, time0) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) - call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + print *, irp_here, ': Unable to connect to task server' + stop -1 + endif if(worker_id == -1) then call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) @@ -91,8 +96,8 @@ subroutine run_dress_slave(thread,iproce,energy) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) end if - cp_max(:) = 0 - do while(cp_done > cp_sent .or. m /= dress_N_cp+1) + !$OMP FLUSH + do while( (cp_done > cp_sent) .or. (m /= dress_N_cp+1) ) !$OMP CRITICAL (send) if(ntask_tbd == 0) then ntask_tbd = size(task_tbd) @@ -233,7 +238,12 @@ subroutine run_dress_slave(thread,iproce,energy) end if !$OMP END SINGLE - call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) + + integer, external :: disconnect_from_taskserver + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + print *, irp_here, ': Unable to disconnect from task server' + stop -1 + endif call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL diff --git a/plugins/shiftedbk/selection_types.f90 b/plugins/shiftedbk/selection_types.f90 deleted file mode 100644 index 29e48524..00000000 --- a/plugins/shiftedbk/selection_types.f90 +++ /dev/null @@ -1,9 +0,0 @@ -module selection_types - type selection_buffer - integer :: N, cur - integer(8) , pointer :: det(:,:,:) - double precision, pointer :: val(:) - double precision :: mini - endtype -end module - diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index e7ade63b..91b037ac 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -536,7 +536,7 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef) call ezfio_set_determinants_mo_label(mo_label) allocate (psi_det_save(N_int,2,ndet)) - !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k) SHARED(psi_det_save,psidet,ndet,N_int) + !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i,j,k) SHARED(psi_det_save,psidet,ndet,N_int,accu_norm) do i=1,ndet do j=1,2 do k=1,N_int diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index f71c2721..a6c4267a 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -62,17 +62,6 @@ subroutine occ_pattern_to_dets(o,d,sze,n_alpha,Nint) enddo call bitstring_to_list(o(1,1), list_todo, nt, Nint) -! nt = 0 -! ishift = 2 -! do i=1,Nint -! l = o(i,1) -! do while (l /= 0_bit_kind) -! nt = nt+1 -! list_todo(nt) = ishift+popcnt(l-1_bit_kind) - popcnt(l) -! l = iand(l,l-1_bit_kind) -! enddo -! ishift = ishift + bit_kind_size -! enddo na = 0 nd = 0 From fb836d063e6d429f8a44c48162cca97d8c364d1b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 25 Sep 2018 12:49:47 +0200 Subject: [PATCH 24/28] Bug --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 4 ++-- src/Davidson/davidson_parallel.irp.f | 4 +--- src/ZMQ/put_get.irp.f | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 0ec775a9..91adeea0 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -21,8 +21,8 @@ program fci_zmq threshold_davidson = threshold_davidson_in * 100.d0 SOFT_TOUCH threshold_davidson -! call diagonalize_CI -! call save_wavefunction + call diagonalize_CI + call save_wavefunction call ezfio_has_hartree_fock_energy(has) if (has) then diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 01f8b9cc..889eb750 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -124,7 +124,6 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, do integer, external :: get_task_from_taskserver integer, external :: task_done_to_taskserver - call sleep(1) if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) == -1) then exit endif @@ -356,14 +355,13 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) ! ============ integer :: istep, imin, imax, ishift - double precision :: w, max_workload, N_det_inv, di + double precision :: w, max_workload, N_det_inv integer, external :: add_task_to_taskserver w = 0.d0 istep=1 ishift=0 imin=1 N_det_inv = 1.d0/dble(N_det) - di = dble(N_det) max_workload = 50000.d0 do imax=1,N_det w = w + 1.d0 diff --git a/src/ZMQ/put_get.irp.f b/src/ZMQ/put_get.irp.f index 70969f52..69acb107 100644 --- a/src/ZMQ/put_get.irp.f +++ b/src/ZMQ/put_get.irp.f @@ -224,7 +224,7 @@ integer function zmq_put8_dvector(zmq_to_qp_run_socket, worker_id, name, x, size rc = f77_zmq_send8(zmq_to_qp_run_socket,x,size_x*8_8,0) if (rc /= size_x*8_8) then - print *, 'Failed in send ', rc, size_x*8, size_x, N_det + print *, 'Failed in send ', rc, size_x*8, size_x zmq_put8_dvector = -1 return endif From 7fc89d857e7538f43034107d30b1a07b835c2740 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 25 Sep 2018 15:53:23 +0200 Subject: [PATCH 25/28] Type errors --- plugins/Molden/print_mo.irp.f | 4 ++-- plugins/dress_zmq/run_dress_slave.irp.f | 5 +++-- src/Determinants/determinants.irp.f | 8 ++++---- src/Determinants/zmq.irp.f | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/plugins/Molden/print_mo.irp.f b/plugins/Molden/print_mo.irp.f index 6ac51bdb..2ee38ae0 100644 --- a/plugins/Molden/print_mo.irp.f +++ b/plugins/Molden/print_mo.irp.f @@ -16,8 +16,8 @@ program print_mos call write_Mo_basis(i_unit_output) - write(i_unit_output,*),'' - write(i_unit_output,*),'' + write(i_unit_output,*)'' + write(i_unit_output,*)'' write(i_unit_output,*)' ------------------------' close(i_unit_output) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 39e430a1..1fd414ab 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -148,7 +148,7 @@ subroutine run_dress_slave(thread,iproce,energy) end if end do call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, & - breve_delta_m, 0, n_tasks) + breve_delta_m, task_buf, n_tasks) end if !$OMP END CRITICAL (send) @@ -234,7 +234,8 @@ subroutine run_dress_slave(thread,iproce,energy) do i=1,N_det_generators if(dress_P(i) <= will_send) sum_f = sum_f + f(i) end do - call push_dress_results(zmq_socket_push, -will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id, 1) + task_buf(1) = purge_task_id + call push_dress_results(zmq_socket_push, -will_send, sum_f, edI_task, edI_index, breve_delta_m, task_buf, 1) end if !$OMP END SINGLE diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 91b037ac..ab0799ab 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -76,7 +76,7 @@ BEGIN_PROVIDER [integer, max_degree_exc] enddo END_PROVIDER -BEGIN_PROVIDER [ integer*8, psi_det_size ] +BEGIN_PROVIDER [ integer, psi_det_size ] implicit none BEGIN_DOC ! Size of the psi_det/psi_coef arrays @@ -88,9 +88,9 @@ BEGIN_PROVIDER [ integer*8, psi_det_size ] if (exists) then call ezfio_get_determinants_n_det(psi_det_size) else - psi_det_size = 1_8 + psi_det_size = 1 endif - psi_det_size = max(psi_det_size,100000_8) + psi_det_size = max(psi_det_size,100000) call write_int(6,psi_det_size,'Dimension of the psi arrays') endif IRP_IF MPI_DEBUG @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ integer*8, psi_det_size ] IRP_IF MPI include 'mpif.h' integer :: ierr - call MPI_BCAST( psi_det_size, 1, MPI_INTEGER8, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST( psi_det_size, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then stop 'Unable to read psi_det_size with MPI' endif diff --git a/src/Determinants/zmq.irp.f b/src/Determinants/zmq.irp.f index 0686be59..97af6210 100644 --- a/src/Determinants/zmq.irp.f +++ b/src/Determinants/zmq.irp.f @@ -11,8 +11,8 @@ integer function zmq_put_psi(zmq_to_qp_run_socket,worker_id) integer, external :: zmq_put_N_states integer, external :: zmq_put_N_det integer, external :: zmq_put_psi_det_size - integer, external :: zmq_put_psi_det - integer, external :: zmq_put_psi_coef + integer*8, external :: zmq_put_psi_det + integer*8, external :: zmq_put_psi_coef zmq_put_psi = 0 if (zmq_put_N_states(zmq_to_qp_run_socket, worker_id) == -1) then From 502fd9d1fd7475fb719bd20cc98a11c7604578e7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Sep 2018 15:53:54 +0200 Subject: [PATCH 26/28] Fixed sBK --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 3 +-- plugins/dress_zmq/dress_stoch_routines.irp.f | 22 +++++++++++++------- plugins/dress_zmq/run_dress_slave.irp.f | 22 ++++++++++---------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 3978cd2a..f12eb719 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -15,8 +15,7 @@ END_PROVIDER integer :: i integer :: e e = elec_num - n_core_orb * 2 - pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10) - pt2_n_tasks_max = 1 + pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_selectors)))/10) do i=1,N_det_generators if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.001d0) then pt2_F(i) = pt2_n_tasks_max diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 9280b688..dddfaf9c 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -30,7 +30,7 @@ END_PROVIDER pt2_N_teeth = 1 else pt2_minDetInFirstTeeth = min(5, N_det_generators) - do pt2_N_teeth=20,2,-1 + do pt2_N_teeth=100,2,-1 if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit end do end if @@ -89,7 +89,7 @@ logical function testTeethBuilding(minF, N) end function BEGIN_PROVIDER[ integer, dress_N_cp_max ] - dress_N_cp_max = 64 + dress_N_cp_max = 28 END_PROVIDER BEGIN_PROVIDER[integer, pt2_J, (N_det_generators)] @@ -102,6 +102,7 @@ END_PROVIDER pt2_J = pt2_J_ dress_R1 = dress_R1_ +!return do m=1,dress_N_cp nmov = 0 @@ -209,6 +210,11 @@ END_PROVIDER enddo dress_N_cp = m-1 + if (dress_N_cp == 0) then + print *, irp_here, 'dress_N_cp = 0' + stop -1 + endif + dress_R1_(dress_N_cp) = N_j dress_M_m(dress_N_cp) = N_c !!!!!!!!!!!!!! @@ -510,6 +516,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, more = 1 do while (.not. found) +print *, 'm, dotfm', m, dot_f(m) if(dot_f(m) == 0) then E0 = 0 do i=dress_dot_n_0(m),1,-1 @@ -527,16 +534,17 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end do end do t = dress_dot_t(m) +!print *, 'm dressncp', m, dress_N_cp avg = E0 + S(t) / dble(c) if (c > 2) then eqt = dabs((S2(t) / c) - (S(t)/c)**2) - eqt = sqrt(eqt / (dble(c)-1.5d0)) - error = eqt + error = sqrt(eqt / (dble(c)-1.5d0)) time = omp_get_wtime() - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E(istate), eqt, time-time0, '' + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E(istate), error, time-time0, '' + else if ( m==dress_N_cp ) then + error = 0.d0 else - eqt = 1.d0 - error = eqt + error =1.d0 endif m += 1 if(dabs(error / avg) <= relative_error) then diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 1fd414ab..4480ef1a 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -36,7 +36,7 @@ subroutine run_dress_slave(thread,iproce,energy) ! integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) ! integer(kind=OMP_LOCK_KIND) :: lck_sto(dress_N_cp) double precision :: fac - integer :: ending + integer :: ending, ending_tmp integer, external :: zmq_get_dvector, zmq_get_int ! double precision, external :: omp_get_wtime double precision :: time, time0 @@ -96,7 +96,8 @@ subroutine run_dress_slave(thread,iproce,energy) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) end if - !$OMP FLUSH + m=0 + !$OMP BARRIER do while( (cp_done > cp_sent) .or. (m /= dress_N_cp+1) ) !$OMP CRITICAL (send) if(ntask_tbd == 0) then @@ -116,13 +117,13 @@ subroutine run_dress_slave(thread,iproce,energy) ntask_tbd -= 1 else m = dress_N_cp + 1 - i= zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending) + if (zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending_tmp) /= -1) then + ending = ending_tmp + endif end if will_send = 0 cp_max(iproc) = m -! print *, cp_max(:) -! print *, '' cp_done = minval(cp_max)-1 if(cp_done > cp_sent) then will_send = cp_sent + 1 @@ -147,6 +148,7 @@ subroutine run_dress_slave(thread,iproce,energy) edI_index(n_tasks) = i end if end do +write(0,*) 'will send', will_send, n_tasks call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, & breve_delta_m, task_buf, n_tasks) end if @@ -160,7 +162,6 @@ subroutine run_dress_slave(thread,iproce,energy) time0 = omp_get_wtime() call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc) time = omp_get_wtime() -!print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 t = dress_T(i_generator) !$OMP CRITICAL(t_crit) @@ -200,7 +201,6 @@ subroutine run_dress_slave(thread,iproce,energy) ntask_buf = 0 end if end if - !$OMP FLUSH end do !$OMP BARRIER @@ -208,11 +208,11 @@ subroutine run_dress_slave(thread,iproce,energy) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) ntask_buf = 0 end if - !$OMP SINGLE - if(purge_task_id /= 0) then - do while(ending == dress_N_cp+1) + + !$OMP SINGLE + if(purge_task_id /= 0) then + do while (zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending) == -1) call sleep(1) - i= zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending) end do will_send = ending From 118bdba3f94f99d057b0d5b4318a3a9f5208ef82 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Sep 2018 16:15:53 +0200 Subject: [PATCH 27/28] Removed prints --- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 -- 1 file changed, 2 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index dddfaf9c..72e00076 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -516,7 +516,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, more = 1 do while (.not. found) -print *, 'm, dotfm', m, dot_f(m) if(dot_f(m) == 0) then E0 = 0 do i=dress_dot_n_0(m),1,-1 @@ -534,7 +533,6 @@ print *, 'm, dotfm', m, dot_f(m) end do end do t = dress_dot_t(m) -!print *, 'm dressncp', m, dress_N_cp avg = E0 + S(t) / dble(c) if (c > 2) then eqt = dabs((S2(t) / c) - (S(t)/c)**2) From d0a5e7d253c283c1f7adaf9491cba228f3171219 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Sep 2018 16:39:57 +0200 Subject: [PATCH 28/28] Removed prints --- plugins/dress_zmq/dress_stoch_routines.irp.f | 3 +- plugins/dress_zmq/run_dress_slave.irp.f | 1 - plugins/shiftedbk/shifted_bk_routines.irp.f | 260 +------------------ 3 files changed, 15 insertions(+), 249 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 72e00076..580f2a25 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -211,8 +211,7 @@ END_PROVIDER dress_N_cp = m-1 if (dress_N_cp == 0) then - print *, irp_here, 'dress_N_cp = 0' - stop -1 + dress_N_cp = 1 endif dress_R1_(dress_N_cp) = N_j diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 4480ef1a..951b5d43 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -148,7 +148,6 @@ subroutine run_dress_slave(thread,iproce,energy) edI_index(n_tasks) = i end if end do -write(0,*) 'will send', will_send, n_tasks call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, & breve_delta_m, task_buf, n_tasks) end if diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 216f9ec3..99a66d45 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,85 +1,15 @@ -use selection_types - - - BEGIN_PROVIDER [ double precision, global_sum_alpha2, (N_states) ] -&BEGIN_PROVIDER [ double precision, slave_sum_alpha2, (N_states, Nproc) ] - global_sum_alpha2 = 0d0 - slave_sum_alpha2 = 0d0 -END_PROVIDER - - BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] -&BEGIN_PROVIDER [ integer, n_det_add ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] -&BEGIN_PROVIDER [ type(selection_buffer), sb, (Nproc) ] -&BEGIN_PROVIDER [ type(selection_buffer), global_sb ] -&BEGIN_PROVIDER [ type(selection_buffer), mini_sb ] -&BEGIN_PROVIDER [ double precision, N_det_increase_factor ] implicit none - fock_diag_tmp_(:,:,:) = 0.d0 integer :: i - N_det_increase_factor = dble(N_states) - - - n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) - call create_selection_buffer(n_det_add, n_det_add*2, global_sb) - call create_selection_buffer(n_det_add, n_det_add*2, mini_sb) - do i=1,Nproc - call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) - end do + fock_diag_tmp_(:,:,:) = 0.d0 a_h_i = 0d0 a_s2_i = 0d0 END_PROVIDER - BEGIN_PROVIDER [ integer, N_dress_int_buffer ] -&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] -&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] - implicit none - N_dress_int_buffer = 1 - N_dress_double_buffer = n_det_add+N_states - N_dress_det_buffer = n_det_add -END_PROVIDER - - -subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) - implicit none - integer, intent(in) :: i_gen, iproc - integer, intent(out) :: int_buf(N_dress_int_buffer), N_buf(3) - double precision, intent(out) :: double_buf(N_dress_double_buffer) - integer(bit_kind), intent(out) :: det_buf(N_int, 2, N_dress_det_buffer) - integer :: i - int_buf(:) = 0 - - call sort_selection_buffer(sb(iproc)) - - if(sb(iproc)%cur > 0) then - !$OMP CRITICAL - call merge_selection_buffers(sb(iproc), mini_sb) - !call sort_selection_buffer(mini_sb) - do i=1,Nproc - mini_sb%mini = min(sb(i)%mini, mini_sb%mini) - end do - do i=1,Nproc - sb(i)%mini = mini_sb%mini - end do - !$OMP END CRITICAL - end if - call truncate_to_mini(sb(iproc)) - det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) - double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) - double_buf(sb(iproc)%cur+1:sb(iproc)%cur+N_states) = slave_sum_alpha2(:,iproc) - N_buf(1) = 1 - N_buf(2) = sb(iproc)%cur+N_states - N_buf(3) = sb(iproc)%cur - - sb(iproc)%cur = 0 - slave_sum_alpha2(:,iproc) = 0d0 -end subroutine - - subroutine generator_start(i_gen, iproc) implicit none integer, intent(in) :: i_gen, iproc @@ -89,158 +19,30 @@ subroutine generator_start(i_gen, iproc) end subroutine -subroutine dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - use bitmasks - implicit none - - integer, intent(in) :: ind, N_buf(3) - integer, intent(in) :: int_buf(*) - double precision, intent(in) :: double_buf(*) - integer(bit_kind), intent(in) :: det_buf(N_int,2,*) - integer :: i - - do i=1,N_buf(3) - call add_to_selection_buffer(global_sb, det_buf(1,1,i), double_buf(i)) - end do - if(N_buf(3) + N_states /= N_buf(2)) stop "buf size" - !$OMP CRITICAL - global_sum_alpha2(:) += double_buf(N_buf(3)+1:N_buf(2)) - !$OMP END CRITICAL -end subroutine - - -subroutine delta_ij_done() - use bitmasks - implicit none - integer :: i, old_det_gen - integer(bit_kind), allocatable :: old_generators(:,:,:) - - allocate(old_generators(N_int, 2, N_det_generators)) - old_generators(:,:,:) = psi_det_generators(:,:,:N_det_generators) - old_det_gen = N_det_generators - - - ! Add buffer only when the last state is computed - call unique_selection_buffer(global_sb) - call sort_selection_buffer(global_sb) - call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) - call copy_H_apply_buffer_to_wf() - if (s2_eig.or.(N_states > 1) ) then - call make_s2_eigenfunction - endif - call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) - call save_wavefunction - -end subroutine - - -subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: alpha(N_int,2,n_alpha) - integer, intent(in) :: n_alpha - integer, allocatable :: minilist(:) - integer(bit_kind), allocatable :: det_minilist(:,:,:) - double precision, allocatable :: delta_ij_loc(:,:,:,:) - integer :: exc(0:2,2,2), h1, h2, p1, p2, s1, s2 - integer :: i, j, k, ex, n_minilist, iproc, degree - double precision :: haa, contrib, phase, c_alpha(N_states,Nproc), s_c_alpha(N_states) - logical :: ok - integer, external :: omp_get_thread_num - - integer,intent(in) :: old_det_gen - integer(bit_kind), intent(in) :: old_generators(N_int, 2, old_det_gen) - - allocate(minilist(N_det_delta_ij), det_minilist(N_int, 2, N_det_delta_ij), delta_ij_loc(N_states, N_det_delta_ij, 2, Nproc)) - - c_alpha = 0d0 - delta_ij_loc = 0d0 - - !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) PRIVATE(i, j, iproc, n_minilist, ex) & - !$OMP PRIVATE(det_minilist, minilist, haa, contrib, s_c_alpha) & - !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) - do i=n_alpha,1,-1 - iproc = omp_get_thread_num()+1 - if(mod(i,10000) == 0) print *, "UNDRESSING", i, "/", n_alpha, iproc - n_minilist = 0 - ok = .false. - - do j=1, old_det_gen - call get_excitation_degree(alpha(1,1,i), old_generators(1,1,j), ex, N_int) - if(ex <= 2) then - call get_excitation(old_generators(1,1,j), alpha(1,1,i), exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - ok = (mo_class(h1)(1:1) == 'A' .or. mo_class(h1)(1:1) == 'I') .and. & - (mo_class(p1)(1:1) == 'A' .or. mo_class(p1)(1:1) == 'V') - if(ok .and. degree == 2) then - ok = (mo_class(h2)(1:1) == 'A' .or. mo_class(h2)(1:1) == 'I') .and. & - (mo_class(p2)(1:1) == 'A' .or. mo_class(p2)(1:1) == 'V') - end if - if(ok) exit - end if - end do - - if(.not. ok) cycle - - do j=1, N_det_delta_ij - call get_excitation_degree(alpha(1,1,i), psi_det(1,1,j), ex, N_int) - if(ex <= 2) then - n_minilist += 1 - det_minilist(:,:,n_minilist) = psi_det(:,:,j) - minilist(n_minilist) = j - end if - end do - call i_h_j(alpha(1,1,i), alpha(1,1,i), N_int, haa) - call dress_with_alpha_(N_states, N_det_delta_ij, N_int, delta_ij_loc(1,1,1,iproc), & - minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, s_c_alpha, iproc) - - c_alpha(:,iproc) += s_c_alpha(:)**2 - end do - !$OMP END PARALLEL DO - - do i=2,Nproc - delta_ij_loc(:,:,:,1) += delta_ij_loc(:,:,:,i) - c_alpha(:,1) += c_alpha(:,i) - end do - - - delta_ij_tmp(:,:,1) -= delta_ij_loc(:,:,1,1) - delta_ij_tmp(:,:,2) -= delta_ij_loc(:,:,2,1) - - !print *, "SUM ALPHA2 PRE", global_sum_alpha2 - !global_sum_alpha2(:) -= c_alpha(:,1) - print *, "SUM C_ALPHA^2 =", global_sum_alpha2(:) - !print *, "*** DRESSINS DIVIDED BY 1+SUM C_ALPHA^2 ***" - !do i=1,N_states - ! delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) - !end do - global_sum_alpha2 = 0d0 -end subroutine - - -subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) +subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) use bitmasks implicit none BEGIN_DOC !delta_ij_loc(:,:,1) : dressing column for H !delta_ij_loc(:,:,2) : dressing column for S2 + !i_gen : generator index in psi_det_generators !minilist : indices of determinants connected to alpha ( in psi_det ) !n_minilist : size of minilist !alpha : alpha determinant END_DOC - integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc + integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) integer,intent(in) :: minilist(n_minilist) double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) - double precision, intent(out) :: contrib, c_alpha(N_states) - double precision,intent(in) :: haa double precision :: hij, sij double precision, external :: diag_H_mat_elem_fock - integer :: i,j,k,l,m, l_sd + double precision :: haa, contrib, c_alpha(N_states) + double precision :: de, a_h_psi(N_states) double precision :: hdress, sdress - double precision :: de, a_h_psi(Nstates) + integer :: i, l_sd + + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) a_h_psi = 0d0 @@ -266,54 +68,20 @@ subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minili do l_sd=1,n_minilist hdress = c_alpha(i) * a_h_i(l_sd, iproc) sdress = c_alpha(i) * a_s2_i(l_sd, iproc) - !if(c_alpha(i) * a_s2_i(l_sd, iproc) > 1d-1) then - ! call debug_det(det_minilist(1,1,l_sd), N_int) - ! call debug_det(alpha,N_int) - !end if delta_ij_loc(i, minilist(l_sd), 1) += hdress delta_ij_loc(i, minilist(l_sd), 2) += sdress end do end do -end subroutine - -subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) - use bitmasks - implicit none - BEGIN_DOC - !delta_ij_loc(:,:,1) : dressing column for H - !delta_ij_loc(:,:,2) : dressing column for S2 - !i_gen : generator index in psi_det_generators - !minilist : indices of determinants connected to alpha ( in psi_det ) - !n_minilist : size of minilist - !alpha : alpha determinant - END_DOC - integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen - integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) - integer,intent(in) :: minilist(n_minilist) - double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) - double precision, external :: diag_H_mat_elem_fock - double precision :: haa, contrib, c_alpha(N_states) - - - - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - - call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) - slave_sum_alpha2(:,iproc) += c_alpha(:)**2 - if(contrib < sb(iproc)%mini) then - call add_to_selection_buffer(sb(iproc), alpha, contrib) - end if end subroutine BEGIN_PROVIDER [ logical, initialize_E0_denominator ] - implicit none - BEGIN_DOC - ! If true, initialize pt2_E0_denominator - END_DOC - initialize_E0_denominator = .True. + implicit none + BEGIN_DOC + ! If true, initialize pt2_E0_denominator + END_DOC + initialize_E0_denominator = .True. END_PROVIDER -