From 6df982f2d010c14607a52d87493c26f1ebb17edc Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 26 Oct 2016 12:28:41 +0200 Subject: [PATCH 001/106] wrongly dimensioned s2_out in davidson_diag_hs2 --- src/Davidson/diagonalization_hs2.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index c8ac3733..b156bd09 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -22,7 +22,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint, iunit integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(inout) :: u_in(dim_in,N_st_diag) - double precision, intent(out) :: energies(N_st), s2_out(N_st) + double precision, intent(out) :: energies(N_st), s2_out(N_st_diag) double precision, allocatable :: H_jj(:), S2_jj(:) double precision :: diag_h_mat_elem From 5b5e45e1ca8b60f26625343b0c9fbc1158113298 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 26 Oct 2016 14:56:37 +0200 Subject: [PATCH 002/106] Ndet sized arrays on stack in slater_rules --- src/Determinants/slater_rules.irp.f | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 6acae282..ce3708df 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1059,13 +1059,15 @@ subroutine i_H_psi(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array) double precision :: phase integer :: exc(0:2,2,2) double precision :: hij - integer :: idx(0:Ndet) + integer, allocatable :: idx(:) ASSERT (Nint > 0) ASSERT (N_int == Nint) ASSERT (Nstate > 0) ASSERT (Ndet > 0) ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) + i_H_psi_array = 0.d0 call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) @@ -1107,7 +1109,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max, double precision :: phase integer :: exc(0:2,2,2) double precision :: hij - integer :: idx(0:Ndet) + integer, allocatable :: idx(:) BEGIN_DOC ! Computes = \sum_J c_J . ! @@ -1120,6 +1122,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max, ASSERT (Nstate > 0) ASSERT (Ndet > 0) ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) i_H_psi_array = 0.d0 call filter_connected_i_H_psi0(keys,key,Nint,N_minilist,idx) @@ -1165,7 +1168,8 @@ subroutine i_H_psi_sec_ord(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array double precision :: phase integer :: exc(0:2,2,2) double precision :: hij - integer :: idx(0:Ndet),n_interact + integer,allocatable :: idx(:) + integer :: n_interact BEGIN_DOC ! for the various Nstates END_DOC @@ -1175,6 +1179,7 @@ subroutine i_H_psi_sec_ord(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array ASSERT (Nstate > 0) ASSERT (Ndet > 0) ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) i_H_psi_array = 0.d0 call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) n_interact = 0 @@ -1224,7 +1229,7 @@ subroutine i_H_psi_SC2(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array,idx double precision :: phase integer :: exc(0:2,2,2) double precision :: hij - integer :: idx(0:Ndet) + integer,allocatable :: idx(:) ASSERT (Nint > 0) ASSERT (N_int == Nint) @@ -1232,6 +1237,7 @@ subroutine i_H_psi_SC2(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array,idx ASSERT (Ndet > 0) ASSERT (Ndet_max >= Ndet) i_H_psi_array = 0.d0 + allocate(idx(0:Ndet)) call filter_connected_i_H_psi0_SC2(keys,key,Nint,Ndet,idx,idx_repeat) do ii=1,idx(0) i = idx(ii) @@ -1271,7 +1277,7 @@ subroutine i_H_psi_SC2_verbose(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_a double precision :: phase integer :: exc(0:2,2,2) double precision :: hij - integer :: idx(0:Ndet) + integer,allocatable :: idx(:) ASSERT (Nint > 0) ASSERT (N_int == Nint) @@ -1279,6 +1285,7 @@ subroutine i_H_psi_SC2_verbose(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_a ASSERT (Ndet > 0) ASSERT (Ndet_max >= Ndet) i_H_psi_array = 0.d0 + allocate(idx(0:Ndet)) call filter_connected_i_H_psi0_SC2(keys,key,Nint,Ndet,idx,idx_repeat) print*,'--------' do ii=1,idx(0) From aebc386a3ef76fef72d7b1a3636343b6d7181fdc Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 21 Nov 2016 13:45:32 +0100 Subject: [PATCH 003/106] removed selection_single --- plugins/Full_CI_ZMQ/selection.irp.f | 1 - plugins/Full_CI_ZMQ/selection_double.irp.f | 24 +- plugins/Full_CI_ZMQ/selection_single.irp.f | 354 --------------------- 3 files changed, 23 insertions(+), 356 deletions(-) delete mode 100644 plugins/Full_CI_ZMQ/selection_single.irp.f diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index a0209cc5..96b45774 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -79,7 +79,6 @@ subroutine select_connected(i_generator,E0,pt2,b) enddo call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) - call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) enddo end subroutine diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index 977622fd..e177c494 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -20,6 +20,11 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) + logical :: monoAdo, monoBdo; + + monoAdo = .true. + monoBdo = .true. + allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) @@ -116,12 +121,16 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end if end do + + do s2=s1,2 sp = s1 + if(s1 /= s2) sp = 3 ib = 1 if(s1 == s2) ib = i1+1 + monoAdo = .true. do i2=N_holes(s2),ib,-1 ! Generate low excitations first h2 = hole_list(i2,s2) @@ -142,11 +151,24 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p bannedOrb(particle_list(i,s3), s3) = .false. enddo enddo - + + if(s1 /= s2) then + if(monoBdo) then + bannedOrb(h1,s1) = .false. + end if + if(monoAdo) then + bannedOrb(h2,s2) = .false. + monoAdo = .false. + end if + end if + + mat = 0d0 call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) + enddo + if(s1 /= s2) monoBdo = .false. enddo enddo enddo diff --git a/plugins/Full_CI_ZMQ/selection_single.irp.f b/plugins/Full_CI_ZMQ/selection_single.irp.f deleted file mode 100644 index f107db11..00000000 --- a/plugins/Full_CI_ZMQ/selection_single.irp.f +++ /dev/null @@ -1,354 +0,0 @@ - - -subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) - use bitmasks - use selection_types - implicit none - BEGIN_DOC -! Select determinants connected to i_det by H - END_DOC - integer, intent(in) :: i_gen - integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - - double precision :: vect(N_states, mo_tot_num) - logical :: bannedOrb(mo_tot_num) - integer :: i, j, k - integer :: h1,h2,s1,s2,i1,i2,ib,sp - integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) - logical :: fullMatch, ok - - - do k=1,N_int - hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) - hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) - enddo - - ! Create lists of holes and particles - ! ----------------------------------- - - integer :: N_holes(2), N_particles(2) - integer :: hole_list(N_int*bit_kind_size,2) - integer :: particle_list(N_int*bit_kind_size,2) - - call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) - call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - - do sp=1,2 - do i=1, N_holes(sp) - h1 = hole_list(i,sp) - call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) - bannedOrb = .true. - do j=1,N_particles(sp) - bannedOrb(particle_list(j, sp)) = .false. - end do - call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) - if(fullMatch) cycle - vect = 0d0 - call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_phasemask(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, vect) - call fill_buffer_single(i_gen, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) - end do - enddo -end subroutine - - -subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) - use bitmasks - use selection_types - implicit none - - integer, intent(in) :: i_generator, sp, h1 - double precision, intent(in) :: vect(N_states, mo_tot_num) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - logical :: ok - integer :: s1, s2, p1, p2, ib, istate - integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, max_e_pert - double precision, external :: diag_H_mat_elem_fock - - - call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) - - do p1=1,mo_tot_num - if(bannedOrb(p1)) cycle - if(vect(1, p1) == 0d0) cycle - call apply_particle(mask, sp, p1, det, ok, N_int) - - - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) - max_e_pert = 0d0 - - do istate=1,N_states - val = vect(istate, p1) - delta_E = E0(istate) - Hii - if (delta_E < 0.d0) then - e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - else - e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - endif - pt2(istate) += e_pert - if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert - end do - - if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) - end do -end subroutine - - -subroutine splash_p(mask, sp, det, phasemask, coefs, N_sel, bannedOrb, vect) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2, N_sel) - double precision, intent(in) :: coefs(N_states, N_sel) - integer, intent(in) :: sp, N_sel - logical, intent(inout) :: bannedOrb(mo_tot_num) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - - integer :: i, j, h(0:2,2), p(0:3,2), nt - integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - do i=1, N_sel - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt > 3) cycle - - do j=1,N_int - perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) - perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) - end do - - call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) - call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) - - call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - - if(nt == 3) then - call get_m2(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - else if(nt == 2) then - call get_m1(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - else - call get_m0(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - end if - end do -end subroutine - - -subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) - integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti - double precision :: hij - double precision, external :: get_phase_bi, integral8 - - integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - integer, parameter :: turn2(2) = (/2,1/) - - if(h(0,sp) == 2) then - h1 = h(1, sp) - h2 = h(2, sp) - do i=1,3 - puti = p(i, sp) - if(bannedOrb(puti)) cycle - p1 = p(turn3_2(1,i), sp) - p2 = p(turn3_2(2,i), sp) - hij = integral8(p1, p2, h1, h2) - integral8(p2, p1, h1, h2) - hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) - vect(:, puti) += hij * coefs - end do - else if(h(0,sp) == 1) then - sfix = turn2(sp) - hfix = h(1,sfix) - pfix = p(1,sfix) - hmob = h(1,sp) - do j=1,2 - puti = p(j, sp) - if(bannedOrb(puti)) cycle - pmob = p(turn2(j), sp) - hij = integral8(pfix, pmob, hfix, hmob) - hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) - vect(:, puti) += hij * coefs - end do - else - puti = p(1,sp) - if(.not. bannedOrb(puti)) then - sfix = turn2(sp) - p1 = p(1,sfix) - p2 = p(2,sfix) - h1 = h(1,sfix) - h2 = h(2,sfix) - hij = (integral8(p1,p2,h1,h2) - integral8(p2,p1,h1,h2)) - hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) - vect(:, puti) += hij * coefs - end if - end if -end subroutine - - - -subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) - integer :: i, hole, p1, p2, sh - logical :: ok, lbanned(mo_tot_num) - integer(bit_kind) :: det(N_int, 2) - double precision :: hij - double precision, external :: get_phase_bi, integral8 - - lbanned = bannedOrb - sh = 1 - if(h(0,2) == 1) sh = 2 - hole = h(1, sh) - lbanned(p(1,sp)) = .true. - if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. - !print *, "SPm1", sp, sh - - p1 = p(1, sp) - - if(sp == sh) then - p2 = p(2, sp) - lbanned(p2) = .true. - - do i=1,hole-1 - if(lbanned(i)) cycle - hij = (integral8(p1, p2, i, hole) - integral8(p2, p1, i, hole)) - hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) - vect(:,i) += hij * coefs - end do - do i=hole+1,mo_tot_num - if(lbanned(i)) cycle - hij = (integral8(p1, p2, hole, i) - integral8(p2, p1, hole, i)) - hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) - vect(:,i) += hij * coefs - end do - - call apply_particle(mask, sp, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - vect(:, p2) += hij * coefs - else - p2 = p(1, sh) - do i=1,mo_tot_num - if(lbanned(i)) cycle - hij = integral8(p1, p2, i, hole) - hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) - vect(:,i) += hij * coefs - end do - end if - - call apply_particle(mask, sp, p1, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - vect(:, p1) += hij * coefs -end subroutine - - -subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) - integer :: i - logical :: ok, lbanned(mo_tot_num) - integer(bit_kind) :: det(N_int, 2) - double precision :: hij - - lbanned = bannedOrb - lbanned(p(1,sp)) = .true. - do i=1,mo_tot_num - if(lbanned(i)) cycle - call apply_particle(mask, sp, i, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - vect(:, i) += hij * coefs - end do -end subroutine - - -subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) - integer, intent(in) :: i_gen, N, sp - logical, intent(inout) :: banned(mo_tot_num) - logical, intent(out) :: fullMatch - - - integer :: i, j, na, nb, list(3), nt - integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) - - fullMatch = .false. - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - genl : do i=1, N - nt = 0 - - do j=1, N_int - myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) - myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) - nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) - end do - - if(nt > 3) cycle - - if(nt <= 2 .and. i < i_gen) then - fullMatch = .true. - return - end if - - call bitstring_to_list(myMask(1,sp), list(1), na, N_int) - - if(nt == 3 .and. i < i_gen) then - do j=1,na - banned(list(j)) = .true. - end do - else if(nt == 1 .and. na == 1) then - banned(list(1)) = .true. - end if - end do genl -end subroutine - - - From bbc6065f25a4647f136d8a5ed327d283696431e1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 15 Dec 2016 12:10:43 +0100 Subject: [PATCH 004/106] Improved parallelism in davidson --- src/Davidson/u0Hu0.irp.f | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 117e704e..9c097c49 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -324,7 +324,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate(ut(N_st_8,n)) + allocate( ut(N_st_8,n)) v_0 = 0.d0 s_0 = 0.d0 @@ -347,7 +347,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) @@ -380,7 +380,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(static,1) COLLAPSE(2) do sh=1,shortcut(0,1) do sh2=1,shortcut(0,1) if (sh==sh2) cycle From 3e37fcd12bd475a01aac1eb6057993d5a323ab7d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 19 Dec 2016 13:27:16 +0100 Subject: [PATCH 005/106] Wf analyzis --- ocaml/Basis.ml | 14 +++-- ocaml/Basis.mli | 2 +- plugins/analyze_wf/NEEDED_CHILDREN_MODULES | 1 + plugins/analyze_wf/README.rst | 12 ++++ plugins/analyze_wf/analyze_wf.irp.f | 70 ++++++++++++++++++++++ plugins/analyze_wf/occupation.irp.f | 23 +++++++ src/Determinants/density_matrix.irp.f | 1 - 7 files changed, 115 insertions(+), 8 deletions(-) create mode 100644 plugins/analyze_wf/NEEDED_CHILDREN_MODULES create mode 100644 plugins/analyze_wf/README.rst create mode 100644 plugins/analyze_wf/analyze_wf.irp.f create mode 100644 plugins/analyze_wf/occupation.irp.f diff --git a/ocaml/Basis.ml b/ocaml/Basis.ml index 869fb132..797d53f2 100644 --- a/ocaml/Basis.ml +++ b/ocaml/Basis.ml @@ -36,9 +36,11 @@ let read_element in_channel at_number element = -let to_string_general ~fmt ~atom_sep b = +let to_string_general ~fmt ~atom_sep ?ele_array b = let new_nucleus n = - Printf.sprintf "Atom %d" n + match ele_array with + | None -> Printf.sprintf "Atom %d" n + | Some x -> Printf.sprintf "%s" (Element.to_string x.(n-1)) in let rec do_work accu current_nucleus = function | [] -> List.rev accu @@ -56,12 +58,12 @@ let to_string_general ~fmt ~atom_sep b = do_work [new_nucleus 1] 1 b |> String.concat ~sep:"\n" -let to_string_gamess = - to_string_general ~fmt:Gto.Gamess ~atom_sep:"" +let to_string_gamess ?ele_array = + to_string_general ?ele_array ~fmt:Gto.Gamess ~atom_sep:"" -let to_string_gaussian b = +let to_string_gaussian ?ele_array b = String.concat ~sep:"\n" - [ to_string_general ~fmt:Gto.Gaussian ~atom_sep:"****" b ; "****" ] + [ to_string_general ?ele_array ~fmt:Gto.Gaussian ~atom_sep:"****" b ; "****" ] let to_string ?(fmt=Gto.Gamess) = match fmt with diff --git a/ocaml/Basis.mli b/ocaml/Basis.mli index 249c14f9..41ddc184 100644 --- a/ocaml/Basis.mli +++ b/ocaml/Basis.mli @@ -14,7 +14,7 @@ val read_element : in_channel -> Nucl_number.t -> Element.t -> (Gto.t * Nucl_number.t) list (** Convert the basis to a string *) -val to_string : ?fmt:Gto.fmt -> (Gto.t * Nucl_number.t) list -> string +val to_string : ?fmt:Gto.fmt -> ?ele_array:Element.t array -> (Gto.t * Nucl_number.t) list -> string (** Convert the basis to an MD5 hash *) val to_md5 : (Gto.t * Nucl_number.t) list -> MD5.t diff --git a/plugins/analyze_wf/NEEDED_CHILDREN_MODULES b/plugins/analyze_wf/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..aae89501 --- /dev/null +++ b/plugins/analyze_wf/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Determinants diff --git a/plugins/analyze_wf/README.rst b/plugins/analyze_wf/README.rst new file mode 100644 index 00000000..179e407d --- /dev/null +++ b/plugins/analyze_wf/README.rst @@ -0,0 +1,12 @@ +========== +analyze_wf +========== + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/analyze_wf/analyze_wf.irp.f b/plugins/analyze_wf/analyze_wf.irp.f new file mode 100644 index 00000000..6d8bffcf --- /dev/null +++ b/plugins/analyze_wf/analyze_wf.irp.f @@ -0,0 +1,70 @@ +program analyze_wf + implicit none + BEGIN_DOC +! Wave function analyzis + END_DOC + read_wf = .True. + SOFT_TOUCH read_wf + call run() +end + +subroutine run + implicit none + integer :: istate, i + integer :: class(0:mo_tot_num,5) + double precision :: occupation(mo_tot_num) + + write(*,'(A)') 'MO Occupation' + write(*,'(A)') '=============' + write(*,'(A)') '' + do istate=1,N_states + call get_occupation_from_dets(occupation,1) + write(*,'(A)') '' + write(*,'(A,I3)'), 'State ', istate + write(*,'(A)') '---------------' + write(*,'(A)') '' + write (*,'(A)') '======== ================' + class = 0 + do i=1,mo_tot_num + write (*,'(I8,X,F16.10)') i, occupation(i) + if (occupation(i) > 1.999d0) then + class(0,1) += 1 + class( class(0,1), 1) = i + else if (occupation(i) > 1.95d0) then + class(0,2) += 1 + class( class(0,2), 2) = i + else if (occupation(i) < 0.001d0) then + class(0,5) += 1 + class( class(0,5), 5) = i + else if (occupation(i) < 0.01d0) then + class(0,4) += 1 + class( class(0,4), 4) = i + else + class(0,3) += 1 + class( class(0,3), 3) = i + endif + enddo + write (*,'(A)') '======== ================' + write (*,'(A)') '' + + write (*,'(A)') 'Suggested classes' + write (*,'(A)') '-----------------' + write (*,'(A)') '' + write (*,'(A)') 'Core :' + write (*,*) (class(i,1), ',', i=1,class(0,1)) + write (*,*) '' + write (*,'(A)') 'Inactive :' + write (*,*) (class(i,2), ',', i=1,class(0,2)) + write (*,'(A)') '' + write (*,'(A)') 'Active :' + write (*,*) (class(i,3), ',', i=1,class(0,3)) + write (*,'(A)') '' + write (*,'(A)') 'Virtual :' + write (*,*) (class(i,4), ',', i=1,class(0,4)) + write (*,'(A)') '' + write (*,'(A)') 'Deleted :' + write (*,*) (class(i,5), ',', i=1,class(0,5)) + write (*,'(A)') '' + enddo + +end diff --git a/plugins/analyze_wf/occupation.irp.f b/plugins/analyze_wf/occupation.irp.f new file mode 100644 index 00000000..d426dc14 --- /dev/null +++ b/plugins/analyze_wf/occupation.irp.f @@ -0,0 +1,23 @@ +subroutine get_occupation_from_dets(occupation, istate) + implicit none + double precision, intent(out) :: occupation(mo_tot_num) + integer, intent(in) :: istate + BEGIN_DOC + ! Returns the average occupation of the MOs + END_DOC + integer :: i,j, ispin + integer :: list(N_int*bit_kind_size,2) + integer :: n_elements(2) + double precision :: c + + occupation = 0.d0 + do i=1,N_det + c = psi_coef(i,istate)*psi_coef(i,istate) + call bitstring_to_list_ab(psi_det(1,1,i), list, n_elements, N_int) + do ispin=1,2 + do j=1,n_elements(ispin) + occupation( list(j,ispin) ) += c + enddo + enddo + enddo +end diff --git a/src/Determinants/density_matrix.irp.f b/src/Determinants/density_matrix.irp.f index 118bbdf7..ed2f49bd 100644 --- a/src/Determinants/density_matrix.irp.f +++ b/src/Determinants/density_matrix.irp.f @@ -194,7 +194,6 @@ subroutine set_natural_mos double precision, allocatable :: tmp(:,:) label = "Natural" -! call mo_as_eigvectors_of_mo_matrix(one_body_dm_mo,size(one_body_dm_mo,1),mo_tot_num,label,-1) call mo_as_svd_vectors_of_mo_matrix(one_body_dm_mo,size(one_body_dm_mo,1),mo_tot_num,mo_tot_num,label) end From d5eb7a1963c61a57b28454028fc87f3a8ba4facf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 19 Dec 2016 14:13:26 +0100 Subject: [PATCH 006/106] Corrected tests --- tests/bats/pseudo.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bats/pseudo.bats b/tests/bats/pseudo.bats index 4b374d76..919d50ce 100644 --- a/tests/bats/pseudo.bats +++ b/tests/bats/pseudo.bats @@ -48,6 +48,6 @@ function run_FCI_ZMQ() { @test "FCI H2O VDZ pseudo" { qp_set_mo_class h2o_pseudo.ezfio -core "[1]" -act "[2-12]" -del "[13-23]" - run_FCI_ZMQ h2o_pseudo.ezfio 2000 -17.0399584106077 -17.0400170044515 + run_FCI_ZMQ h2o_pseudo.ezfio 2000 -17.035547572687399 -17.035583407558221 } From 0653c435cd573994b192016d8b184329a0d6ae66 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 19 Dec 2016 14:24:43 +0100 Subject: [PATCH 007/106] Travis --- src/Davidson/u0Hu0.irp.f | 2 +- tests/bats/pseudo.bats | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 9c097c49..6e20f0d0 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -380,7 +380,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(static,1) COLLAPSE(2) + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,1) do sh2=1,shortcut(0,1) if (sh==sh2) cycle diff --git a/tests/bats/pseudo.bats b/tests/bats/pseudo.bats index 919d50ce..4b374d76 100644 --- a/tests/bats/pseudo.bats +++ b/tests/bats/pseudo.bats @@ -48,6 +48,6 @@ function run_FCI_ZMQ() { @test "FCI H2O VDZ pseudo" { qp_set_mo_class h2o_pseudo.ezfio -core "[1]" -act "[2-12]" -del "[13-23]" - run_FCI_ZMQ h2o_pseudo.ezfio 2000 -17.035547572687399 -17.035583407558221 + run_FCI_ZMQ h2o_pseudo.ezfio 2000 -17.0399584106077 -17.0400170044515 } From 8cbe460f177c4d991de4f315585028d97c0eb421 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 26 Dec 2016 17:11:44 +0100 Subject: [PATCH 008/106] Introduces PT2max ZMQ --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 121 ----------------------- plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f | 95 ++++++++++++++++++ plugins/Full_CI_ZMQ/zmq_selection.irp.f | 117 ++++++++++++++++++++++ 3 files changed, 212 insertions(+), 121 deletions(-) create mode 100644 plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f create mode 100644 plugins/Full_CI_ZMQ/zmq_selection.irp.f diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index ae0d7989..ee86a8f7 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -117,124 +117,3 @@ program fci_zmq call ezfio_set_full_ci_zmq_energy(CI_energy(1)) call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) end - - - - -subroutine ZMQ_selection(N_in, pt2) - use f77_zmq - use selection_types - - implicit none - - character*(512) :: task - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer, intent(in) :: N_in - type(selection_buffer) :: b - integer :: i, N - integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - - - if (.True.) then - PROVIDE pt2_e0_denominator - N = max(N_in,1) - provide nproc - call new_parallel_job(zmq_to_qp_run_socket,"selection") - call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(N, N*2, b) - endif - - integer :: i_generator, i_generator_start, i_generator_max, step -! step = int(max(1.,10*elec_num/mo_tot_num) - - step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) - step = max(1,step) - do i= 1, N_det_generators,step - i_generator_start = i - i_generator_max = min(i+step-1,N_det_generators) - write(task,*) i_generator_start, i_generator_max, 1, N - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) - i = omp_get_thread_num() - if (i==0) then - call selection_collector(b, pt2) - else - call selection_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'selection') - if (N_in > 0) then - call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN - call copy_H_apply_buffer_to_wf() - if (s2_eig) then - call make_s2_eigenfunction - endif - endif -end subroutine - - -subroutine selection_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_selection_slave(1,i,pt2_e0_denominator) -end - -subroutine selection_collector(b, pt2) - use f77_zmq - use selection_types - use bitmasks - implicit none - - - type(selection_buffer), intent(inout) :: b - double precision, intent(out) :: pt2(N_states) - double precision :: pt2_mwen(N_states) - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - integer(ZMQ_PTR) :: zmq_socket_pull - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, ntask - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) - integer, allocatable :: task_id(:) - integer :: done - real :: time, time0 - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) - done = 0 - more = 1 - pt2(:) = 0d0 - call CPU_TIME(time0) - do while (more == 1) - call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) - pt2 += pt2_mwen - do i=1, N - call add_to_selection_buffer(b, det(1,1,i), val(i)) - end do - - do i=1, ntask - if(task_id(i) == 0) then - print *, "Error in collector" - endif - call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) - end do - done += ntask - call CPU_TIME(time) -! print *, "DONE" , done, time - time0 - end do - - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_pull_socket(zmq_socket_pull) - call sort_selection_buffer(b) -end subroutine - diff --git a/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f b/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f new file mode 100644 index 00000000..52f825f1 --- /dev/null +++ b/plugins/Full_CI_ZMQ/target_pt2_zmq.irp.f @@ -0,0 +1,95 @@ +program fci_zmq + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision, allocatable :: pt2(:) + integer :: Nmin, Nmax + integer :: n_det_before, to_select + double precision :: threshold_davidson_in, ratio, E_ref, pt2_ratio + + allocate (pt2(N_states)) + + pt2 = 1.d0 + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson + + double precision :: E_CI_before(N_states) + do while (dabs(pt2(1)) > pt2_max) + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k=1, N_states + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', CI_energy(k) + print *, 'E(before)+PT2 = ', E_CI_before(k)+pt2(k) + enddo + print *, '-----' + E_CI_before(1:N_states) = CI_energy(1:N_states) + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + + n_det_before = N_det + to_select = N_det + to_select = max(64-to_select, to_select) + call ZMQ_selection(to_select, pt2) + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + call diagonalize_CI + call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + enddo + + threshold_selectors = max(threshold_selectors,threshold_selectors_pt2) + threshold_generators = max(threshold_generators,threshold_generators_pt2) + threshold_davidson = threshold_davidson_in + TOUCH threshold_selectors threshold_generators threshold_davidson + call diagonalize_CI + call ZMQ_selection(0, pt2) + + E_ref = CI_energy(1) + pt2(1) + pt2_ratio = (E_ref + pt2_max - HF_energy) / (E_ref - HF_energy) + print *, 'Est FCI = ', E_ref + + Nmax = N_det + Nmin = N_det/8 + do while (Nmax-Nmin > 1) + call diagonalize_CI + ratio = (CI_energy(1) - HF_energy) / (E_ref - HF_energy) + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + TOUCH psi_coef psi_det + if (ratio < pt2_ratio) then + Nmin = N_det + to_select = (Nmax-Nmin)/2 + call ZMQ_selection(to_select, pt2) + else + Nmax = N_det + N_det = Nmin + (Nmax-Nmin)/2 + endif + print *, '-----' + print *, 'Det min, Det max: ', Nmin, Nmax + print *, 'Ratio : ', ratio, ' ~ ', pt2_ratio + print *, 'HF_energy = ', HF_energy + print *, 'Est FCI = ', E_ref + print *, 'N_det = ', N_det + print *, 'E = ', CI_energy(1) + print *, 'PT2 = ', pt2(1) + enddo + call ZMQ_selection(0, pt2) + print *, '------' + print *, 'E = ', CI_energy(1) + print *, 'PT2 = ', pt2(1) + + E_CI_before(1:N_states) = CI_energy(1:N_states) + call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) +end + + + + diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f new file mode 100644 index 00000000..75992273 --- /dev/null +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -0,0 +1,117 @@ +subroutine ZMQ_selection(N_in, pt2) + use f77_zmq + use selection_types + + implicit none + + character*(512) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, intent(in) :: N_in + type(selection_buffer) :: b + integer :: i, N + integer, external :: omp_get_thread_num + double precision, intent(out) :: pt2(N_states) + + + if (.True.) then + PROVIDE pt2_e0_denominator + N = max(N_in,1) + provide nproc + call new_parallel_job(zmq_to_qp_run_socket,"selection") + call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(N, N*2, b) + endif + + integer :: i_generator, i_generator_start, i_generator_max, step +! step = int(max(1.,10*elec_num/mo_tot_num) + + step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) + step = max(1,step) + do i= 1, N_det_generators,step + i_generator_start = i + i_generator_max = min(i+step-1,N_det_generators) + write(task,*) i_generator_start, i_generator_max, 1, N + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call selection_collector(b, pt2) + else + call selection_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'selection') + if (N_in > 0) then + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN + call copy_H_apply_buffer_to_wf() + if (s2_eig) then + call make_s2_eigenfunction + endif + endif +end subroutine + + +subroutine selection_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_selection_slave(1,i,pt2_e0_denominator) +end + +subroutine selection_collector(b, pt2) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + type(selection_buffer), intent(inout) :: b + double precision, intent(out) :: pt2(N_states) + double precision :: pt2_mwen(N_states) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, N, ntask + double precision, allocatable :: val(:) + integer(bit_kind), allocatable :: det(:,:,:) + integer, allocatable :: task_id(:) + integer :: done + real :: time, time0 + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) + done = 0 + more = 1 + pt2(:) = 0d0 + call CPU_TIME(time0) + do while (more == 1) + call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) + pt2 += pt2_mwen + do i=1, N + call add_to_selection_buffer(b, det(1,1,i), val(i)) + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) + end do + done += ntask + call CPU_TIME(time) +! print *, "DONE" , done, time - time0 + end do + + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + call sort_selection_buffer(b) +end subroutine + From 956c1e46807c8452714ef3f25e7599aaba6e6808 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 26 Dec 2016 17:38:26 +0100 Subject: [PATCH 009/106] target_pt2_ratio_zmq --- .../Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f diff --git a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f new file mode 100644 index 00000000..10ef4777 --- /dev/null +++ b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f @@ -0,0 +1,96 @@ +program fci_zmq + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision, allocatable :: pt2(:) + integer :: Nmin, Nmax + integer :: n_det_before, to_select + double precision :: threshold_davidson_in, ratio, E_ref + + allocate (pt2(N_states)) + + pt2 = 1.d0 + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson + + ! Stopping criterion is the PT2max + + double precision :: E_CI_before(N_states) + do while (dabs(pt2(1)) > pt2_max) + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k=1, N_states + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', CI_energy(k) + print *, 'E(before)+PT2 = ', E_CI_before(k)+pt2(k) + enddo + print *, '-----' + E_CI_before(1:N_states) = CI_energy(1:N_states) + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + + n_det_before = N_det + to_select = N_det + to_select = max(64-to_select, to_select) + call ZMQ_selection(to_select, pt2) + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + call diagonalize_CI + call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + enddo + + threshold_selectors = max(threshold_selectors,threshold_selectors_pt2) + threshold_generators = max(threshold_generators,threshold_generators_pt2) + threshold_davidson = threshold_davidson_in + TOUCH threshold_selectors threshold_generators threshold_davidson + call diagonalize_CI + call ZMQ_selection(0, pt2) + + E_ref = CI_energy(1) + pt2(1) + print *, 'Est FCI = ', E_ref + + Nmax = N_det + Nmin = N_det/8 + do while (Nmax-Nmin > 1) + call diagonalize_CI + ratio = (CI_energy(1) - HF_energy) / (E_ref - HF_energy) + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + TOUCH psi_coef psi_det + if (ratio < var_pt2_ratio) then + Nmin = N_det + to_select = (Nmax-Nmin)/2 + call ZMQ_selection(to_select, pt2) + else + Nmax = N_det + N_det = Nmin + (Nmax-Nmin)/2 + endif + print *, '-----' + print *, 'Det min, Det max: ', Nmin, Nmax + print *, 'Ratio : ', ratio, ' ~ ', var_pt2_ratio + print *, 'HF_energy = ', HF_energy + print *, 'Est FCI = ', E_ref + print *, 'N_det = ', N_det + print *, 'E = ', CI_energy(1) + print *, 'PT2 = ', pt2(1) + enddo + call ZMQ_selection(0, pt2) + print *, '------' + print *, 'E = ', CI_energy(1) + print *, 'PT2 = ', pt2(1) + + E_CI_before(1:N_states) = CI_energy(1:N_states) + call save_wavefunction + call ezfio_set_full_ci_zmq_energy(CI_energy(1)) + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) +end + + + + From fb5432abe00634a17c0288641f55bcb7ff604725 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 26 Dec 2016 19:02:53 +0100 Subject: [PATCH 010/106] Better target_pt2_ratio_zmq --- .../Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f index 10ef4777..77bbab03 100644 --- a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f @@ -7,6 +7,10 @@ program fci_zmq integer :: Nmin, Nmax integer :: n_det_before, to_select double precision :: threshold_davidson_in, ratio, E_ref + + double precision, allocatable :: psi_coef_ref(:,:) + integer(bit_kind), allocatable :: psi_det_ref(:,:,:) + allocate (pt2(N_states)) @@ -56,34 +60,39 @@ program fci_zmq print *, 'Est FCI = ', E_ref Nmax = N_det - Nmin = N_det/8 + Nmin = 2 + allocate (psi_coef_ref(size(psi_coef_sorted,1),size(psi_coef_sorted,2))) + allocate (psi_det_ref(N_int,2,size(psi_det_sorted,3))) + psi_coef_ref = psi_coef_sorted + psi_det_ref = psi_det_sorted + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + TOUCH psi_coef psi_det do while (Nmax-Nmin > 1) + psi_coef = psi_coef_ref + psi_det = psi_det_ref + TOUCH psi_det psi_coef call diagonalize_CI ratio = (CI_energy(1) - HF_energy) / (E_ref - HF_energy) - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - TOUCH psi_coef psi_det if (ratio < var_pt2_ratio) then Nmin = N_det - to_select = (Nmax-Nmin)/2 - call ZMQ_selection(to_select, pt2) else Nmax = N_det - N_det = Nmin + (Nmax-Nmin)/2 endif + N_det = Nmin + (Nmax-Nmin)/2 print *, '-----' print *, 'Det min, Det max: ', Nmin, Nmax print *, 'Ratio : ', ratio, ' ~ ', var_pt2_ratio - print *, 'HF_energy = ', HF_energy - print *, 'Est FCI = ', E_ref print *, 'N_det = ', N_det print *, 'E = ', CI_energy(1) - print *, 'PT2 = ', pt2(1) enddo call ZMQ_selection(0, pt2) print *, '------' + print *, 'HF_energy = ', HF_energy + print *, 'Est FCI = ', E_ref print *, 'E = ', CI_energy(1) print *, 'PT2 = ', pt2(1) + print *, 'E+PT2 = ', CI_energy(1)+pt2(1) E_CI_before(1:N_states) = CI_energy(1:N_states) call save_wavefunction From 82772b96c7675b263e1b0a0b13e3678d38f17ae9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 29 Dec 2016 22:00:41 +0100 Subject: [PATCH 011/106] MRCC_selected --- plugins/Psiref_threshold/psi_ref.irp.f | 71 ++++-- plugins/mrcc_selected/dressing.irp.f | 80 ++----- plugins/mrcc_selected/mrcc_selected.irp.f | 1 - plugins/mrcc_selected/mrcepa0_general.irp.f | 15 +- src/Davidson/u0Hu0.irp.f | 234 ++++++++++++++------ src/Determinants/Fock_diag.irp.f | 9 + 6 files changed, 239 insertions(+), 171 deletions(-) diff --git a/plugins/Psiref_threshold/psi_ref.irp.f b/plugins/Psiref_threshold/psi_ref.irp.f index ee69ef5c..62321140 100644 --- a/plugins/Psiref_threshold/psi_ref.irp.f +++ b/plugins/Psiref_threshold/psi_ref.irp.f @@ -1,5 +1,44 @@ use bitmasks +! BEGIN_PROVIDER [ integer(bit_kind), psi_ref, (N_int,2,psi_det_size) ] +!&BEGIN_PROVIDER [ double precision, psi_ref_coef, (psi_det_size,n_states) ] +!&BEGIN_PROVIDER [ integer, idx_ref, (psi_det_size) ] +!&BEGIN_PROVIDER [ integer, N_det_ref ] +! implicit none +! BEGIN_DOC +! ! Reference wave function, defined as determinants with amplitudes > 0.05 +! ! idx_ref gives the indice of the ref determinant in psi_det. +! END_DOC +! integer :: i, k, l +! logical :: good +! double precision, parameter :: threshold=0.01d0 +! double precision :: t(N_states) +! N_det_ref = 0 +! do l = 1, N_states +! t(l) = threshold * abs_psi_coef_max(l) +! enddo +! do i=1,N_det +! good = .False. +! do l=1, N_states +! psi_ref_coef(i,l) = 0.d0 +! good = good.or.(dabs(psi_coef(i,l)) > t(l)) +! enddo +! if (good) then +! N_det_ref = N_det_ref+1 +! do k=1,N_int +! psi_ref(k,1,N_det_ref) = psi_det(k,1,i) +! psi_ref(k,2,N_det_ref) = psi_det(k,2,i) +! enddo +! idx_ref(N_det_ref) = i +! do k=1,N_states +! psi_ref_coef(N_det_ref,k) = psi_coef(i,k) +! enddo +! endif +! enddo +! call write_int(output_determinants,N_det_ref, 'Number of determinants in the reference') +! +!END_PROVIDER + BEGIN_PROVIDER [ integer(bit_kind), psi_ref, (N_int,2,psi_det_size) ] &BEGIN_PROVIDER [ double precision, psi_ref_coef, (psi_det_size,n_states) ] &BEGIN_PROVIDER [ integer, idx_ref, (psi_det_size) ] @@ -10,30 +49,16 @@ use bitmasks ! idx_ref gives the indice of the ref determinant in psi_det. END_DOC integer :: i, k, l - logical :: good - double precision, parameter :: threshold=0.05d0 - double precision :: t(N_states) - N_det_ref = 0 - do l = 1, N_states - t(l) = threshold * abs_psi_coef_max(l) - enddo - do i=1,N_det - good = .False. - do l=1, N_states - psi_ref_coef(i,l) = 0.d0 - good = good.or.(dabs(psi_coef(i,l)) > t(l)) + double precision, parameter :: threshold=0.01d0 + + call find_reference(threshold, N_det_ref, idx_ref) + do l=1,N_states + do i=1,N_det_ref + psi_ref_coef(i,l) = psi_coef(idx_ref(i), l) enddo - if (good) then - N_det_ref = N_det_ref+1 - do k=1,N_int - psi_ref(k,1,N_det_ref) = psi_det(k,1,i) - psi_ref(k,2,N_det_ref) = psi_det(k,2,i) - enddo - idx_ref(N_det_ref) = i - do k=1,N_states - psi_ref_coef(N_det_ref,k) = psi_coef(i,k) - enddo - endif + enddo + do i=1,N_det_ref + psi_ref(:,:,i) = psi_det(:,:,idx_ref(i)) enddo call write_int(output_determinants,N_det_ref, 'Number of determinants in the reference') diff --git a/plugins/mrcc_selected/dressing.irp.f b/plugins/mrcc_selected/dressing.irp.f index c772e2aa..23fedcee 100644 --- a/plugins/mrcc_selected/dressing.irp.f +++ b/plugins/mrcc_selected/dressing.irp.f @@ -534,63 +534,9 @@ END_PROVIDER END_PROVIDER -! BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] -! use bitmasks -! implicit none -! integer :: i,j,k -! double precision :: Hjk, Hki, Hij, pre(N_det_ref), wall -! integer :: i_state, degree, npre, ipre(N_det_ref), npres(N_det_ref) -! -! ! provide lambda_mrcc -! npres = 0 -! delta_cas = 0d0 -! call wall_time(wall) -! print *, "dcas ", wall -! do i_state = 1, N_states -! !!$OMP PARALLEL DO default(none) schedule(dynamic) private(pre,npre,ipre,j,k,Hjk,Hki,degree) shared(npres,lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref) -! do k=1,N_det_non_ref -! if(lambda_mrcc(i_state, k) == 0d0) cycle -! npre = 0 -! do i=1,N_det_ref -! call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki) -! if(Hki /= 0d0) then -! !!$OMP ATOMIC -! npres(i) += 1 -! npre += 1 -! ipre(npre) = i -! pre(npre) = Hki -! end if -! end do -! -! -! do i=1,npre -! do j=1,i -! !!$OMP ATOMIC -! delta_cas(ipre(i),ipre(j),i_state) += pre(i) * pre(j) * lambda_mrcc(i_state, k) -! end do -! end do -! end do -! !!$OMP END PARALLEL DO -! npre=0 -! do i=1,N_det_ref -! npre += npres(i) -! end do -! !stop -! do i=1,N_det_ref -! do j=1,i -! delta_cas(j,i,i_state) = delta_cas(i,j,i_state) -! end do -! end do -! end do -! -! call wall_time(wall) -! print *, "dcas", wall -! ! stop -! END_PROVIDER - - BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ] -&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ] + BEGIN_PROVIDER [ double precision, delta_ref, (N_det_ref, N_det_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, delta_ref_s2, (N_det_ref, N_det_ref, N_states) ] use bitmasks implicit none integer :: i,j,k @@ -600,22 +546,22 @@ END_PROVIDER provide lambda_mrcc dIj do i_state = 1, N_states - !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij) + !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_ref,delta_ref_s2,N_det_ref,dij) do i=1,N_det_ref do j=1,i call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int) - delta_cas(i,j,i_state) = 0d0 - delta_cas_s2(i,j,i_state) = 0d0 + delta_ref(i,j,i_state) = 0d0 + delta_ref_s2(i,j,i_state) = 0d0 do k=1,N_det_non_ref call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk) - delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) - delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k) + delta_ref(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) + delta_ref_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k) end do - delta_cas(j,i,i_state) = delta_cas(i,j,i_state) - delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state) + delta_ref(j,i,i_state) = delta_ref(i,j,i_state) + delta_ref_s2(j,i,i_state) = delta_ref_s2(i,j,i_state) end do end do !$OMP END PARALLEL DO @@ -739,7 +685,7 @@ end function !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2) & !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) & !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active) & - !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) & + !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_ref, delta_ref_s2) & !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij) do blok=1,cepa0_shortcut(0) do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 @@ -781,8 +727,8 @@ end function notf = notf+1 ! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) - contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) - contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib = delta_ref(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) + contrib_s2 = delta_ref_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) if(dabs(psi_ref_coef(J,i_state)).ge.1.d-3) then contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) @@ -828,7 +774,7 @@ END_PROVIDER integer :: II, blok - provide delta_cas lambda_mrcc + provide delta_ref lambda_mrcc allocate(idx_sorted_bit(N_det)) idx_sorted_bit(:) = -1 do i=1,N_det_non_ref diff --git a/plugins/mrcc_selected/mrcc_selected.irp.f b/plugins/mrcc_selected/mrcc_selected.irp.f index 91592e62..b64f968d 100644 --- a/plugins/mrcc_selected/mrcc_selected.irp.f +++ b/plugins/mrcc_selected/mrcc_selected.irp.f @@ -8,7 +8,6 @@ program mrsc2sub read_wf = .True. SOFT_TOUCH read_wf - call print_cas_coefs call set_generators_bitmasks_as_holes_and_particles call run(N_states,energy) if(do_pt2_end)then diff --git a/plugins/mrcc_selected/mrcepa0_general.irp.f b/plugins/mrcc_selected/mrcepa0_general.irp.f index e3a2d1f5..812aeef0 100644 --- a/plugins/mrcc_selected/mrcepa0_general.irp.f +++ b/plugins/mrcc_selected/mrcepa0_general.irp.f @@ -60,16 +60,17 @@ subroutine run(N_st,energy) end -subroutine print_cas_coefs +subroutine print_ref_coefs implicit none integer :: i,j - print *, 'CAS' - print *, '===' - do i=1,N_det_cas - print *, (psi_cas_coef(i,j), j=1,N_states) - call debug_det(psi_cas(1,1,i),N_int) + print *, 'Reference' + print *, '=========' + do i=1,N_det_ref + print *, (psi_ref_coef(i,j), j=1,N_states) + call debug_det(psi_ref(1,1,i),N_int) enddo + print *, '' call write_double(6,ci_energy(1),"Initial CI energy") end @@ -202,7 +203,7 @@ subroutine run_pt2(N_st,energy) print*,'Last iteration only to compute the PT2' - N_det_generators = N_det_cas + N_det_generators = N_det_ref N_det_selectors = N_det_non_ref do i=1,N_det_generators diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 6e20f0d0..9e76bc92 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -32,20 +32,20 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) use bitmasks implicit none BEGIN_DOC - ! Computes v_0 = H|u_0> + ! Computes v_0 = H|u_0> ! ! n : number of determinants ! ! H_jj : array of + ! END_DOC integer, intent(in) :: N_st,n,Nint, sze_8 double precision, intent(out) :: v_0(sze_8,N_st) double precision, intent(in) :: u_0(sze_8,N_st) double precision, intent(in) :: H_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - double precision :: hij - double precision, allocatable :: vt(:,:) - double precision, allocatable :: ut(:,:) + double precision :: hij,s2 + double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -57,77 +57,41 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + integer :: blockb, blockb2, istep + double precision :: ave_workload, workload, target_workload_inv + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st N_st_8 = align_double(N_st) ASSERT (Nint > 0) ASSERT (Nint == N_int) ASSERT (n>0) - PROVIDE ref_bitmask_energy + PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate(ut(N_st_8,n)) + allocate( ut(N_st_8,n)) v_0 = 0.d0 - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(i,istate) - enddo - enddo - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) - + !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,H_jj,keys_tmp,ut,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) - allocate(vt(N_st_8,n)) + !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& + !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) + allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 - - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0,1) - do sh2=1,shortcut(0,1) - exa = popcnt(xor(version(1,sh,1), version(1,sh2,1))) - if(exa > 2) then - cycle - end if - do ni=2,Nint - exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) - end do - if(exa > 2) then - cycle - end if - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - jloop: do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 - org_j = sort_idx(j,1) - ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) - if(ext > 4) then - cycle jloop - endif - do ni=2,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - if(ext > 4) then - cycle jloop - endif - end do - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - do istate=1,N_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - enddo - enddo jloop - enddo + St = 0.d0 + + !$OMP DO + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(sort_idx(i,2),istate) enddo enddo - !$OMP END DO NOWAIT - + !$OMP END DO + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 @@ -135,40 +99,164 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) do j=shortcut(sh,2),shortcut(sh+1,2)-1 org_j = sort_idx(j,2) ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) + if (ext > 4) cycle do ni=2,Nint ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) + if (ext > 4) exit end do - if(ext /= 4) then - cycle - endif - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - do istate=1,N_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - enddo + if(ext == 4) then + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) + enddo + end if end do end do enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL + !$OMP END DO + + !$OMP DO + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(sort_idx(i,1),istate) + enddo + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0,1) + do sh2=1,shortcut(0,1) + if (sh==sh2) cycle + + exa = 0 + do ni=1,Nint + exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut(sh,1),shortcut(sh+1,1)-1 + org_i = sort_idx(i,1) + do ni=1,Nint + sorted_i(ni) = sorted(ni,i,1) + enddo + + do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if (ext > 4) cycle + do ni=2,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if (ext > 4) exit + end do + if(ext <= 4) then + org_j = sort_idx(j,1) + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + if (hij /= 0.d0) then + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + enddo + endif + if (ext /= 2) then + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + if (s2 /= 0.d0) then + do istate=1,n_st + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) + enddo + endif + endif + endif + enddo + + enddo + enddo + + exa = 0 + + do i=shortcut(sh,1),shortcut(sh+1,1)-1 + org_i = sort_idx(i,1) + do ni=1,Nint + sorted_i(ni) = sorted(ni,i,1) + enddo + + do j=shortcut(sh,1),i-1 + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if (ext > 4) cycle + do ni=2,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if (ext > 4) exit + end do + if(ext <= 4) then + org_j = sort_idx(j,1) + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + if (hij /= 0.d0) then + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + enddo + endif + if (ext /= 2) then + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + if (s2 /= 0.d0) then + do istate=1,n_st + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) + enddo + endif + endif + endif + enddo + + do j=i+1,shortcut(sh+1,1)-1 + if (i==j) cycle + ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) + if (ext > 4) cycle + do ni=2,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) + if (ext > 4) exit + end do + if(ext <= 4) then + org_j = sort_idx(j,1) + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + if (hij /= 0.d0) then + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + enddo + endif + if (ext /= 2) then + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + if (s2 /= 0.d0) then + do istate=1,n_st + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) + enddo + endif + endif + endif + enddo + enddo + enddo + !$OMP END DO + + !$OMP CRITICAL (u0Hu0) do istate=1,N_st - do i=n,1,-1 + do i=1,n v_0(i,istate) = v_0(i,istate) + vt(istate,i) enddo enddo - !$OMP END CRITICAL + !$OMP END CRITICAL (u0Hu0) - deallocate(vt) + deallocate(vt,st) !$OMP END PARALLEL - + do istate=1,N_st do i=1,n - v_0(i,istate) += H_jj(i) * u_0(i,istate) + v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate) enddo enddo deallocate (shortcut, sort_idx, sorted, version, ut) end + BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] implicit none BEGIN_DOC diff --git a/src/Determinants/Fock_diag.irp.f b/src/Determinants/Fock_diag.irp.f index a99bbcad..01393fe1 100644 --- a/src/Determinants/Fock_diag.irp.f +++ b/src/Determinants/Fock_diag.irp.f @@ -19,6 +19,15 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint) fock_diag_tmp = 0.d0 E0 = 0.d0 + if (Ne(1) /= elec_alpha_num) then + print *, 'Error in build_fock_tmp (alpha)', Ne(1), Ne(2) + stop -1 + endif + if (Ne(2) /= elec_beta_num) then + print *, 'Error in build_fock_tmp (beta)', Ne(1), Ne(2) + stop -1 + endif + ! Occupied MOs do ii=1,elec_alpha_num i = occ(ii,1) From bbe9024304f67653b81bac0cfa5ff13431c26566 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 29 Dec 2016 22:01:09 +0100 Subject: [PATCH 012/106] Forgot file --- src/Davidson/find_reference.irp.f | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/Davidson/find_reference.irp.f diff --git a/src/Davidson/find_reference.irp.f b/src/Davidson/find_reference.irp.f new file mode 100644 index 00000000..0cafd739 --- /dev/null +++ b/src/Davidson/find_reference.irp.f @@ -0,0 +1,41 @@ +subroutine find_reference(thresh,n_ref,result) + implicit none + double precision, intent(in) :: thresh + integer, intent(out) :: result(N_det),n_ref + integer :: i,j,istate + double precision :: i_H_psi_array(1), E0, hii, norm + double precision :: de + integer(bit_kind), allocatable :: psi_ref_(:,:,:) + double precision, allocatable :: psi_ref_coef_(:,:) + + allocate(psi_ref_coef_(N_det,1), psi_ref_(N_int,2,N_det)) + n_ref = 1 + result(1) = 1 + istate = 1 + psi_ref_coef_(1,1) = psi_coef(1,istate) + psi_ref_(:,:,1) = psi_det(:,:,1) + norm = psi_ref_coef_(1,1) * psi_ref_coef_(1,1) + call u_0_H_u_0(E0,psi_ref_coef_,n_ref,psi_ref_,N_int,1,size(psi_ref_coef_,1)) + print *, '' + print *, 'Reference determinants' + print *, '======================' + print *, '' + print *, n_ref, ': E0 = ', E0 + nuclear_repulsion + call debug_det(psi_ref_(1,1,n_ref),N_int) + do i=2,N_det + call i_h_psi(psi_det(1,1,i),psi_ref_(1,1,1),psi_ref_coef_(1,istate),N_int, & + n_ref,size(psi_ref_coef_,1),1,i_H_psi_array) + call i_H_j(psi_det(1,1,i),psi_det(1,1,i),N_int,hii) + de = i_H_psi_array(istate)**2 / (E0 - hii) + if (dabs(de) > thresh) then + n_ref += 1 + result(n_ref) = i + psi_ref_(:,:,n_ref) = psi_det(:,:,i) + psi_ref_coef_(n_ref,1) = psi_coef(i,istate) + call u_0_H_u_0(E0,psi_ref_coef_,n_ref,psi_ref_,N_int,1,size(psi_ref_coef_,1)) + print *, n_ref, ': E0 = ', E0 + nuclear_repulsion + call debug_det(psi_ref_(1,1,n_ref),N_int) + endif + enddo +end + From 0ef200d6b134d7beb41a35827e06b9f389973484 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 29 Dec 2016 22:11:11 +0100 Subject: [PATCH 013/106] Forgot files --- plugins/mrcc_selected/EZFIO.cfg | 33 +++++++++++++++++++ plugins/mrcc_selected/NEEDED_CHILDREN_MODULES | 1 + plugins/mrcc_selected/README.rst | 12 +++++++ 3 files changed, 46 insertions(+) create mode 100644 plugins/mrcc_selected/EZFIO.cfg create mode 100644 plugins/mrcc_selected/NEEDED_CHILDREN_MODULES create mode 100644 plugins/mrcc_selected/README.rst diff --git a/plugins/mrcc_selected/EZFIO.cfg b/plugins/mrcc_selected/EZFIO.cfg new file mode 100644 index 00000000..b64637e6 --- /dev/null +++ b/plugins/mrcc_selected/EZFIO.cfg @@ -0,0 +1,33 @@ +[lambda_type] +type: Positive_int +doc: lambda type +interface: ezfio,provider,ocaml +default: 0 + +[energy] +type: double precision +doc: Calculated energy +interface: ezfio + +[energy_pt2] +type: double precision +doc: Calculated energy with PT2 contribution +interface: ezfio + +[energy] +type: double precision +doc: Calculated energy +interface: ezfio + +[thresh_dressed_ci] +type: Threshold +doc: Threshold on the convergence of the dressed CI energy +interface: ezfio,provider,ocaml +default: 1.e-5 + +[n_it_max_dressed_ci] +type: Strictly_positive_int +doc: Maximum number of dressed CI iterations +interface: ezfio,provider,ocaml +default: 10 + diff --git a/plugins/mrcc_selected/NEEDED_CHILDREN_MODULES b/plugins/mrcc_selected/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..ea28c761 --- /dev/null +++ b/plugins/mrcc_selected/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Perturbation Selectors_full Generators_full Psiref_threshold MRCC_Utils ZMQ diff --git a/plugins/mrcc_selected/README.rst b/plugins/mrcc_selected/README.rst new file mode 100644 index 00000000..997d005e --- /dev/null +++ b/plugins/mrcc_selected/README.rst @@ -0,0 +1,12 @@ +======= +mrcepa0 +======= + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. From e1d014aa41793642942f5ed6872d09afb18af4e4 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Jan 2017 10:06:41 +0100 Subject: [PATCH 014/106] init --- config/ifort.cfg | 2 +- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 412 +++++++++++++++++++++++++++++- 2 files changed, 410 insertions(+), 4 deletions(-) diff --git a/config/ifort.cfg b/config/ifort.cfg index 4b1429b8..041c302e 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -18,7 +18,7 @@ IRPF90_FLAGS : --ninja --align=32 # 0 : Deactivate # [OPTION] -MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below CACHE : 1 ; Enable cache_compile.py OPENMP : 1 ; Append OpenMP flags diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index c81b1266..676b4b0d 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -85,9 +85,10 @@ program fci_zmq if(do_pt2_end)then print*,'Last iteration only to compute the PT2' threshold_selectors = 1.d0 - threshold_generators = 0.9999d0 + threshold_generators = 1d0 ! 0.9999d0 E_CI_before(1:N_states) = CI_energy(1:N_states) - call ZMQ_selection(0, pt2) + !call ZMQ_selection(0, pt2) pour non-stochastic + call ZMQ_pt2(pt2) print *, 'Final step' print *, 'N_det = ', N_det print *, 'N_states = ', N_states @@ -103,8 +104,182 @@ program fci_zmq call save_wavefunction end +! subroutine ZMQ_pt2(pt2) +! use f77_zmq +! use selection_types +! +! implicit none +! +! character*(1000000) :: task +! integer(ZMQ_PTR) :: zmq_to_qp_run_socket +! type(selection_buffer) :: b +! integer :: i, N +! integer, external :: omp_get_thread_num +! double precision, intent(out) :: pt2(N_states) +! +! integer*8, allocatable :: bulk(:), tirage(:) +! integer, allocatable :: todo(:) +! double precision, allocatable :: pt2_detail(:,:), val(:,:), weight(:) +! double precision :: sume, sume2 +! double precision :: tot_n +! +! allocate(bulk(N_det), tirage(N_det), todo(0:N_det), pt2_detail(N_states, N_det), val(N_states, N_det)) +! +! sume = 0d0 +! sume2 = 0d0 +! tot_n = 0d0 +! bulk = 0 +! tirage = 0 +! todo = 0 +! +! +! N = 1 +! provide nproc +! provide ci_electronic_energy +! call new_parallel_job(zmq_to_qp_run_socket,"pt2") +! call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) +! call zmq_set_running(zmq_to_qp_run_socket) +! call create_selection_buffer(N, N*2, b) +! +! integer :: i_generator, i_generator_end, generator_per_task, step +! +! integer :: mergeN +! mergeN = 100 +! call get_carlo_workbatch(tirage, weight, todo, bulk, 1d-2, mergeN) +! print *, "CARLO", todo(0), mergeN +! +! generator_per_task = todo(0)/1000 + 1 +! do i=1,todo(0),generator_per_task +! i_generator_end = min(i+generator_per_task-1, todo(0)) +! print *, "TASK", (i_generator_end-i+1), todo(i:i_generator_end) +! write(task,*) (i_generator_end-i+1), todo(i:i_generator_end) +! call add_task_to_taskserver(zmq_to_qp_run_socket,task) +! end do +! print *, "tasked" +! pt2_detail = 0d0 +! !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) +! i = omp_get_thread_num() +! if (i==0) then +! call pt2_collector(b, pt2_detail) +! else +! call pt2_slave_inproc(i) +! endif +! !$OMP END PARALLEL +! call end_parallel_job(zmq_to_qp_run_socket, 'pt2') +! print *, "daune" +! val += pt2_detail +! call perform_carlo(tirage, weight, bulk, val, sume, sume2, mergeN) +! tot_n = 0 +! double precision :: sweight +! sweight = 0d0 +! do i=1,N_det +! if(weight(i) /= 0) tot_n = tot_n + dfloat(bulk(i)) +! sweight += weight(i) +! end do +! print *, "PT2_DETAIL", tot_n, sume/tot_n, sume, sume2 +! pt2 = 0d0 +! do i=1,N_det +! if(weight(i) /= 0d0) exit +! pt2(:) += pt2_detail(:,i) +! end do +! print *, "N_determinist = ", i-1 +! end subroutine +subroutine ZMQ_pt2(pt2) + use f77_zmq + use selection_types + + implicit none + + character*(1000000) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + type(selection_buffer) :: b + integer, external :: omp_get_thread_num + double precision, intent(out) :: pt2(N_states) + + + double precision :: pt2_detail(N_states, N_det_generators), comb(100000) + logical :: computed(N_det_generators) + integer :: tbc(0:N_det_generators) + integer :: i, Ncomb, generator_per_task, i_generator_end + integer, external :: pt2_find + provide nproc + + call new_parallel_job(zmq_to_qp_run_socket,"pt2") + call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(1, 1*2, b) + + + call random_seed() + + computed = .false. + tbc(0) = first_det_of_comb - 1 + do i=1, tbc(0) + tbc(i) = i + computed(i) = .true. + end do + print *, "detererminist initial ", tbc(0)+1 + !LOOP? + call get_carlo_workbatch(1d-3, computed, comb, Ncomb, tbc) + generator_per_task = tbc(0)/1000 + 1 + print *, "TASK", tbc(0), tbc(1:tbc(0)) + do i=1,tbc(0),generator_per_task + i_generator_end = min(i+generator_per_task-1, tbc(0)) + !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) + write(task,*) (i_generator_end-i+1), tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + pt2_detail = 0d0 + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call pt2_collector(b, pt2_detail) + else + call pt2_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'pt2') + double precision :: E0, avg, eqt + double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + call do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) + tbc(0) = 0 + !END LOOP? + integer :: tooth + !-8.091550677158776E-003 + call get_first_tooth(computed, tooth) + E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) + avg = E0 + (sumabove(tooth) / Nabove(tooth)) + eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + print *, "PT2 ", avg, "+/-", eqt + pt2 = 0d0 +end subroutine + + +subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) + integer, intent(in) :: tbc(0:N_det_generators), Ncomb + double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators) + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + integer :: i, dets(comb_teeth) + double precision :: myVal, myVal2 + + + do i=1,Ncomb + call get_comb(comb(i), dets) + myVal = 0d0 + myVal2 = 0d0 + do j=comb_teeth,1,-1 + if(pt2_detail(1, dets(j)) == -1d0) print *, "uncalculatedidified", dets(j), pt2_detail(1, dets(j)-1:dets(j)+1) + myVal += pt2_detail(1, dets(j)) / weight(dets(j)) * comb_step + sumabove(j) += myVal + sum2above(j) += myVal**2 + Nabove(j) += 1 + end do + end do +end subroutine + subroutine ZMQ_selection(N_in, pt2) use f77_zmq @@ -112,7 +287,7 @@ subroutine ZMQ_selection(N_in, pt2) implicit none - character*(512) :: task + character*(1000000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer, intent(in) :: N_in type(selection_buffer) :: b @@ -164,6 +339,72 @@ subroutine selection_slave_inproc(i) call run_selection_slave(1,i,ci_electronic_energy) end +subroutine pt2_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_pt2_slave(1,i,ci_electronic_energy) +end + +subroutine pt2_collector(b, pt2_detail) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + type(selection_buffer), intent(inout) :: b + double precision, intent(out) :: pt2_detail(N_states, N_det) + double precision :: pt2_mwen(N_states, N_det) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, N, ntask + double precision, allocatable :: val(:) + integer(bit_kind), allocatable :: det(:,:,:) + integer, allocatable :: task_id(:) + integer :: done, Nindex + integer, allocatable :: index(:) + real :: time, time0 + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) + done = 0 + more = 1 + pt2_detail = -1d0 + call CPU_TIME(time0) + do while (more == 1) + call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) + do i=1,Nindex + pt2_detail(:, index(i)) += pt2_mwen(:,i) + end do + + !do i=1, N + ! call add_to_selection_buffer(b, det(1,1,i), val(i)) + !end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) + end do + done += ntask + call CPU_TIME(time) +! print *, "DONE" , done, time - time0 + end do + + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + call sort_selection_buffer(b) +end subroutine + + subroutine selection_collector(b, pt2) use f77_zmq use selection_types @@ -218,3 +459,168 @@ subroutine selection_collector(b, pt2) call sort_selection_buffer(b) end subroutine + + +integer function pt2_find(v, w) + implicit none + double precision :: v, w(N_det) + integer :: i,l,h + + l = 0 + h = N_det-1 + + do while(h >= l) + i = (h+l)/2 + if(w(i+1) > v) then + h = i-1 + else + l = i+1 + end if + end do + pt2_find = l+1 +end function + + +BEGIN_PROVIDER [ integer, comb_teeth ] + implicit none + comb_teeth = 10 +END_PROVIDER + + + +subroutine get_first_tooth(computed, first_teeth) + implicit none + logical, intent(in) :: computed(N_det_generators) + integer, intent(out) :: first_teeth + integer :: i + + first_teeth = 1 + do i=first_det_of_comb, N_det_generators + if(not(computed(i))) then + first_teeth = i + exit + end if + end do + + do i=comb_teeth, 1, -1 + if(first_det_of_teeth(i) < first_teeth) then + first_teeth = i + exit + end if + end do +end subroutine + +subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) + implicit none + double precision, intent(in) :: maxWorkload + double precision, intent(out) :: comb(N_det_generators) + integer, intent(inout) :: tbc(0:N_det_generators) + integer, intent(out) :: Ncomb + logical, intent(inout) :: computed(N_det_generators) + integer :: i, dets(comb_teeth) + double precision :: myWorkload + + myWorkload = 0d0 + + do i=1,size(comb) + call RANDOM_NUMBER(comb(i)) + comb(i) = comb(i) * comb_step + call add_comb(comb(i), computed, tbc, myWorkload) + Ncomb = i + if(myWorkload > maxWorkload) exit + end do +end subroutine + + +subroutine get_comb(stato, dets) + implicit none + double precision, intent(in) :: stato + integer, intent(out) :: dets(comb_teeth) + double precision :: curs + integer :: j + integer, external :: pt2_find + + curs = 1d0 - stato + do j = comb_teeth, 1, -1 + dets(j) = pt2_find(curs, cweight) + curs -= comb_step + end do +end subroutine + + +subroutine add_comb(comb, computed, tbc, workload) + implicit none + double precision, intent(in) :: comb + logical, intent(inout) :: computed(N_det_generators) + double precision, intent(inout) :: workload + integer, intent(inout) :: tbc(0:N_det_generators) + integer :: i, dets(comb_teeth) + + call get_comb(comb, dets) + + do i = 1, comb_teeth + if(not(computed(dets(i)))) then + tbc(0) += 1 + tbc(tbc(0)) = dets(i) + workload += comb_workload(dets(i)) + computed(dets(i)) = .true. + end if + end do +end subroutine + + + + BEGIN_PROVIDER [ double precision, weight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, cweight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb_workload, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb_step ] +&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth) ] +&BEGIN_PROVIDER [ integer, first_det_of_comb ] + implicit none + integer :: i + double precision :: norm_left, stato + integer, external :: pt2_find + + weight(1) = psi_coef_generators(1,1)**2 + cweight(1) = psi_coef_generators(1,1)**2 + + do i=2,N_det_generators + weight(i) = psi_coef_generators(i,1)**2 + cweight(i) = cweight(i-1) + psi_coef_generators(i,1)**2 + end do + + weight = weight / cweight(N_det_generators) + cweight = cweight / cweight(N_det_generators) + comb_workload = 1d0 / dfloat(N_det_generators) + + norm_left = 1d0 + + comb_step = 1d0/dfloat(comb_teeth) + do i=1,N_det_generators + if(weight(i)/norm_left < comb_step/2d0) then + first_det_of_comb = i + exit + end if + norm_left -= weight(i) + end do + + comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - cweight(first_det_of_comb-1)) + + stato = 1d0 - comb_step + 1d-5 + do i=comb_teeth, 1, -1 + first_det_of_teeth(i) = pt2_find(stato, cweight) + stato -= comb_step + end do +print *, first_det_of_teeth(1), first_det_of_comb + if(first_det_of_teeth(1) /= first_det_of_comb) stop "comb provider" +END_PROVIDER + + + + + + + + + + From 8832b28ac97bd588a2c8a760f9a03af45eb97bba Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 4 Jan 2017 12:21:29 +0100 Subject: [PATCH 015/106] Corrected tests --- tests/bats/mrcepa0.bats | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/bats/mrcepa0.bats b/tests/bats/mrcepa0.bats index dc9e0bb4..6bca8b7e 100644 --- a/tests/bats/mrcepa0.bats +++ b/tests/bats/mrcepa0.bats @@ -16,7 +16,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.23752746236 1.e-4 + eq $energy -76.2385617521816 1.e-4 } @test "MRCC H2O cc-pVDZ" { @@ -32,7 +32,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.237469267705 2.e-4 + eq $energy -76.2385052514433 2.e-4 } @test "MRSC2 H2O cc-pVDZ" { @@ -48,7 +48,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.2347764009137 2.e-4 + eq $energy -76.235786994991 2.e-4 } @test "MRCEPA0 H2O cc-pVDZ" { @@ -64,6 +64,6 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.2406942855164 2.e-4 + eq $energy -76.2417725924747 2.e-4 } From d1e52144d3d9f8658efd05fe4fb734e110b788ee Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Jan 2017 13:41:37 +0100 Subject: [PATCH 016/106] bias at long runtime --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 84 ++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 25 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 676b4b0d..842d5d6b 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -204,34 +204,46 @@ subroutine ZMQ_pt2(pt2) integer :: tbc(0:N_det_generators) integer :: i, Ncomb, generator_per_task, i_generator_end integer, external :: pt2_find + + double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + double precision, external :: omp_get_wtime + double precision :: time0, time + + provide nproc - call new_parallel_job(zmq_to_qp_run_socket,"pt2") - call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(1, 1*2, b) - - call random_seed() - + computed = .false. tbc(0) = first_det_of_comb - 1 do i=1, tbc(0) tbc(i) = i computed(i) = .true. end do - print *, "detererminist initial ", tbc(0)+1 - !LOOP? + pt2_detail = 0d0 + + time0 = omp_get_wtime() + print *, "grep - time - avg - err - n_combs" + do while(.true.) + + call new_parallel_job(zmq_to_qp_run_socket,"pt2") + call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(1, 1*2, b) + + + + + + call get_carlo_workbatch(1d-3, computed, comb, Ncomb, tbc) generator_per_task = tbc(0)/1000 + 1 - print *, "TASK", tbc(0), tbc(1:tbc(0)) do i=1,tbc(0),generator_per_task i_generator_end = min(i+generator_per_task-1, tbc(0)) !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) write(task,*) (i_generator_end-i+1), tbc(i:i_generator_end) call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do - pt2_detail = 0d0 !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() @@ -243,17 +255,37 @@ subroutine ZMQ_pt2(pt2) !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, 'pt2') double precision :: E0, avg, eqt - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) call do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) - tbc(0) = 0 !END LOOP? integer :: tooth !-8.091550677158776E-003 call get_first_tooth(computed, tooth) - E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - print *, "PT2 ", avg, "+/-", eqt + print *, "TOOTH ", tooth + + !!! ASSERT + do i=1,first_det_of_teeth(tooth)-1 + if(not(computed(i))) stop "deter non calc" + end do + logical :: ok + ok = .false. + do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1)-1 + if(not(computed(i))) ok = .true. + end do + if(not(ok)) stop "not OK..." + !!!!! + + if(Nabove(tooth) >= 30) then + E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) + avg = E0 + (sumabove(tooth) / Nabove(tooth)) + eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + time = omp_get_wtime() + print "(A, 5(E15.7))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth) + else + print *, Nabove(tooth), "< 30 combs" + end if + tbc(0) = 0 + end do + pt2 = 0d0 end subroutine @@ -271,7 +303,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) myVal = 0d0 myVal2 = 0d0 do j=comb_teeth,1,-1 - if(pt2_detail(1, dets(j)) == -1d0) print *, "uncalculatedidified", dets(j), pt2_detail(1, dets(j)-1:dets(j)+1) + !if(pt2_detail(1, dets(j)) == -1d0) print *, "uncalculatedidified", dets(j), pt2_detail(1, dets(j)-1:dets(j)+1) myVal += pt2_detail(1, dets(j)) / weight(dets(j)) * comb_step sumabove(j) += myVal sum2above(j) += myVal**2 @@ -354,7 +386,7 @@ subroutine pt2_collector(b, pt2_detail) type(selection_buffer), intent(inout) :: b - double precision, intent(out) :: pt2_detail(N_states, N_det) + double precision, intent(inout) :: pt2_detail(N_states, N_det) double precision :: pt2_mwen(N_states, N_det) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -375,7 +407,7 @@ subroutine pt2_collector(b, pt2_detail) allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) done = 0 more = 1 - pt2_detail = -1d0 + !pt2_detail = 0d0 call CPU_TIME(time0) do while (more == 1) call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) @@ -483,7 +515,7 @@ end function BEGIN_PROVIDER [ integer, comb_teeth ] implicit none - comb_teeth = 10 + comb_teeth = 20 END_PROVIDER @@ -503,13 +535,14 @@ subroutine get_first_tooth(computed, first_teeth) end do do i=comb_teeth, 1, -1 - if(first_det_of_teeth(i) < first_teeth) then + if(first_det_of_teeth(i) <= first_teeth) then first_teeth = i exit end if end do end subroutine + subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) implicit none double precision, intent(in) :: maxWorkload @@ -574,7 +607,7 @@ end subroutine &BEGIN_PROVIDER [ double precision, cweight, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_workload, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth) ] +&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] &BEGIN_PROVIDER [ integer, first_det_of_comb ] implicit none integer :: i @@ -597,7 +630,7 @@ end subroutine comb_step = 1d0/dfloat(comb_teeth) do i=1,N_det_generators - if(weight(i)/norm_left < comb_step/2d0) then + if(weight(i)/norm_left < comb_step/1d1) then first_det_of_comb = i exit end if @@ -611,8 +644,9 @@ end subroutine first_det_of_teeth(i) = pt2_find(stato, cweight) stato -= comb_step end do -print *, first_det_of_teeth(1), first_det_of_comb + first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 if(first_det_of_teeth(1) /= first_det_of_comb) stop "comb provider" + END_PROVIDER From 5f21ec66e2db865a10b52f9004cd49c94e316fa2 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Jan 2017 14:44:18 +0100 Subject: [PATCH 017/106] working --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 842d5d6b..079d3ea5 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -260,22 +260,27 @@ subroutine ZMQ_pt2(pt2) integer :: tooth !-8.091550677158776E-003 call get_first_tooth(computed, tooth) - print *, "TOOTH ", tooth + !print *, "TOOTH ", tooth !!! ASSERT - do i=1,first_det_of_teeth(tooth)-1 - if(not(computed(i))) stop "deter non calc" - end do - logical :: ok - ok = .false. - do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1)-1 - if(not(computed(i))) ok = .true. - end do - if(not(ok)) stop "not OK..." + !do i=1,first_det_of_teeth(tooth) + ! if(not(computed(i))) stop "deter non calc" + !end do + !logical :: ok + !ok = .false. + !do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1) + ! if(not(computed(i))) ok = .true. + !end do + !if(not(ok)) stop "not OK..." !!!!! - + double precision :: prop if(Nabove(tooth) >= 30) then E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) + !print *, "preprop ", prop, weight(first_det_of_teeth(tooth)) + prop = prop / weight(first_det_of_teeth(tooth)) + !print *, "prop", prop + E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) time = omp_get_wtime() @@ -535,7 +540,7 @@ subroutine get_first_tooth(computed, first_teeth) end do do i=comb_teeth, 1, -1 - if(first_det_of_teeth(i) <= first_teeth) then + if(first_det_of_teeth(i) < first_teeth) then first_teeth = i exit end if @@ -630,7 +635,7 @@ end subroutine comb_step = 1d0/dfloat(comb_teeth) do i=1,N_det_generators - if(weight(i)/norm_left < comb_step/1d1) then + if(weight(i)/norm_left < comb_step/2d0) then first_det_of_comb = i exit end if @@ -639,12 +644,13 @@ end subroutine comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - cweight(first_det_of_comb-1)) - stato = 1d0 - comb_step + 1d-5 + stato = 1d0 - comb_step! + 1d-5 do i=comb_teeth, 1, -1 first_det_of_teeth(i) = pt2_find(stato, cweight) stato -= comb_step end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 + first_det_of_teeth(1) = first_det_of_comb if(first_det_of_teeth(1) /= first_det_of_comb) stop "comb provider" END_PROVIDER From 6881056eafef466ee4e15c47888efcec1f30249c Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 5 Jan 2017 15:27:05 +0100 Subject: [PATCH 018/106] pt2 slave --- config/ifort.cfg | 2 +- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 129 ++++++++-------------------- plugins/Full_CI_ZMQ/pt2_slave.irp.f | 93 ++++++++++++++++++++ 3 files changed, 131 insertions(+), 93 deletions(-) create mode 100644 plugins/Full_CI_ZMQ/pt2_slave.irp.f diff --git a/config/ifort.cfg b/config/ifort.cfg index 041c302e..4b1429b8 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -18,7 +18,7 @@ IRPF90_FLAGS : --ninja --align=32 # 0 : Deactivate # [OPTION] -MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below CACHE : 1 ; Enable cache_compile.py OPENMP : 1 ; Append OpenMP flags diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 079d3ea5..40f4849a 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -104,87 +104,6 @@ program fci_zmq call save_wavefunction end -! subroutine ZMQ_pt2(pt2) -! use f77_zmq -! use selection_types -! -! implicit none -! -! character*(1000000) :: task -! integer(ZMQ_PTR) :: zmq_to_qp_run_socket -! type(selection_buffer) :: b -! integer :: i, N -! integer, external :: omp_get_thread_num -! double precision, intent(out) :: pt2(N_states) -! -! integer*8, allocatable :: bulk(:), tirage(:) -! integer, allocatable :: todo(:) -! double precision, allocatable :: pt2_detail(:,:), val(:,:), weight(:) -! double precision :: sume, sume2 -! double precision :: tot_n -! -! allocate(bulk(N_det), tirage(N_det), todo(0:N_det), pt2_detail(N_states, N_det), val(N_states, N_det)) -! -! sume = 0d0 -! sume2 = 0d0 -! tot_n = 0d0 -! bulk = 0 -! tirage = 0 -! todo = 0 -! -! -! N = 1 -! provide nproc -! provide ci_electronic_energy -! call new_parallel_job(zmq_to_qp_run_socket,"pt2") -! call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) -! call zmq_set_running(zmq_to_qp_run_socket) -! call create_selection_buffer(N, N*2, b) -! -! integer :: i_generator, i_generator_end, generator_per_task, step -! -! integer :: mergeN -! mergeN = 100 -! call get_carlo_workbatch(tirage, weight, todo, bulk, 1d-2, mergeN) -! print *, "CARLO", todo(0), mergeN -! -! generator_per_task = todo(0)/1000 + 1 -! do i=1,todo(0),generator_per_task -! i_generator_end = min(i+generator_per_task-1, todo(0)) -! print *, "TASK", (i_generator_end-i+1), todo(i:i_generator_end) -! write(task,*) (i_generator_end-i+1), todo(i:i_generator_end) -! call add_task_to_taskserver(zmq_to_qp_run_socket,task) -! end do -! print *, "tasked" -! pt2_detail = 0d0 -! !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) -! i = omp_get_thread_num() -! if (i==0) then -! call pt2_collector(b, pt2_detail) -! else -! call pt2_slave_inproc(i) -! endif -! !$OMP END PARALLEL -! call end_parallel_job(zmq_to_qp_run_socket, 'pt2') -! print *, "daune" -! val += pt2_detail -! call perform_carlo(tirage, weight, bulk, val, sume, sume2, mergeN) -! tot_n = 0 -! double precision :: sweight -! sweight = 0d0 -! do i=1,N_det -! if(weight(i) /= 0) tot_n = tot_n + dfloat(bulk(i)) -! sweight += weight(i) -! end do -! print *, "PT2_DETAIL", tot_n, sume/tot_n, sume, sume2 -! pt2 = 0d0 -! do i=1,N_det -! if(weight(i) /= 0d0) exit -! pt2(:) += pt2_detail(:,i) -! end do -! print *, "N_determinist = ", i-1 -! end subroutine - subroutine ZMQ_pt2(pt2) use f77_zmq @@ -192,16 +111,16 @@ subroutine ZMQ_pt2(pt2) implicit none - character*(1000000) :: task + character*(512) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket type(selection_buffer) :: b integer, external :: omp_get_thread_num double precision, intent(out) :: pt2(N_states) - double precision :: pt2_detail(N_states, N_det_generators), comb(100000) - logical :: computed(N_det_generators) - integer :: tbc(0:N_det_generators) + double precision, allocatable :: pt2_detail(:,:), comb(:) + logical, allocatable :: computed(:) + integer, allocatable :: tbc(:) integer :: i, Ncomb, generator_per_task, i_generator_end integer, external :: pt2_find @@ -209,7 +128,7 @@ subroutine ZMQ_pt2(pt2) double precision, external :: omp_get_wtime double precision :: time0, time - + allocate(pt2_detail(N_states, N_det_generators), comb(100000), computed(N_det_generators), tbc(0:N_det_generators)) provide nproc call random_seed() @@ -220,6 +139,7 @@ subroutine ZMQ_pt2(pt2) tbc(i) = i computed(i) = .true. end do + pt2_detail = 0d0 time0 = omp_get_wtime() @@ -237,14 +157,16 @@ subroutine ZMQ_pt2(pt2) call get_carlo_workbatch(1d-3, computed, comb, Ncomb, tbc) - generator_per_task = tbc(0)/1000 + 1 - do i=1,tbc(0),generator_per_task + generator_per_task = 1 ! tbc(0)/300 + 1 + print *, 'TASKS REVERSED' + !do i=1,tbc(0),generator_per_task + do i=tbc(0),1,-1 ! generator_per_task i_generator_end = min(i+generator_per_task-1, tbc(0)) !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) write(task,*) (i_generator_end-i+1), tbc(i:i_generator_end) call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do - + print *, "tasked" !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() if (i==0) then @@ -324,7 +246,7 @@ subroutine ZMQ_selection(N_in, pt2) implicit none - character*(1000000) :: task + character*(512) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer, intent(in) :: N_in type(selection_buffer) :: b @@ -520,7 +442,7 @@ end function BEGIN_PROVIDER [ integer, comb_teeth ] implicit none - comb_teeth = 20 + comb_teeth = 100 END_PROVIDER @@ -565,7 +487,30 @@ subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) comb(i) = comb(i) * comb_step call add_comb(comb(i), computed, tbc, myWorkload) Ncomb = i - if(myWorkload > maxWorkload) exit + if(myWorkload > maxWorkload .and. i >= 30) exit + end do + call reorder_tbc(tbc) +end subroutine + + +subroutine reorder_tbc(tbc) + implicit none + integer, intent(inout) :: tbc(0:N_det_generators) + logical, allocatable :: ltbc(:) + integer :: i, ci + + allocate(ltbc(N_det_generators)) + ltbc = .false. + do i=1,tbc(0) + ltbc(tbc(i)) = .true. + end do + + ci = 0 + do i=1,N_det_generators + if(ltbc(i)) then + ci += 1 + tbc(ci) = i + end if end do end subroutine diff --git a/plugins/Full_CI_ZMQ/pt2_slave.irp.f b/plugins/Full_CI_ZMQ/pt2_slave.irp.f new file mode 100644 index 00000000..91c3db63 --- /dev/null +++ b/plugins/Full_CI_ZMQ/pt2_slave.irp.f @@ -0,0 +1,93 @@ +program pt2_slave + implicit none + BEGIN_DOC +! Helper program to compute the PT2 in distributed mode. + END_DOC + + read_wf = .False. + SOFT_TOUCH read_wf + call provide_everything + call switch_qp_run_to_master + call run_wf +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context +! PROVIDE ci_electronic_energy mo_tot_num N_int +end + +subroutine run_wf + use f77_zmq + implicit none + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states_diag) + character*(64) :: states(1) + integer :: rc, i + + call provide_everything + + zmq_context = f77_zmq_ctx_new () + states(1) = 'pt2' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + + call wait_for_states(states,zmq_state,1) + + if(trim(zmq_state) == 'Stopped') then + + exit + + else if (trim(zmq_state) == 'pt2') then + + ! Selection + ! --------- + + print *, 'PT2' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call pt2_slave_tcp(i, energy) + !$OMP END PARALLEL + print *, 'PT2 done' + + endif + + end do +end + +subroutine update_energy(energy) + implicit none + double precision, intent(in) :: energy(N_states_diag) + BEGIN_DOC +! Update energy when it is received from ZMQ + END_DOC + integer :: j,k + do j=1,N_states + do k=1,N_det + CI_eigenvectors(k,j) = psi_coef(k,j) + enddo + enddo + call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) + if (.True.) then + do k=1,size(ci_electronic_energy) + ci_electronic_energy(k) = energy(k) + enddo + TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors + endif + + call write_double(6,ci_energy,'Energy') +end + +subroutine pt2_slave_tcp(i,energy) + implicit none + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: i + + call run_pt2_slave(0,i,energy) +end + From 3979677a82b1008a30938d3b61e09e9e06b22f21 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 11 Jan 2017 15:04:00 +0100 Subject: [PATCH 019/106] MRSC2 no amplitudes --- plugins/Psiref_CAS/psi_ref.irp.f | 33 +++++++ plugins/mrsc2_no_amp/NEEDED_CHILDREN_MODULES | 1 + plugins/mrsc2_no_amp/README.rst | 12 +++ plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f | 78 ++++++++++++++++ plugins/mrsc2_no_amp/sc2_no_amp.irp.f | 9 ++ src/Determinants/filter_connected.irp.f | 98 ++++++++++++++++++++ 6 files changed, 231 insertions(+) create mode 100644 plugins/mrsc2_no_amp/NEEDED_CHILDREN_MODULES create mode 100644 plugins/mrsc2_no_amp/README.rst create mode 100644 plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f create mode 100644 plugins/mrsc2_no_amp/sc2_no_amp.irp.f diff --git a/plugins/Psiref_CAS/psi_ref.irp.f b/plugins/Psiref_CAS/psi_ref.irp.f index d3b6c28f..ab9e6943 100644 --- a/plugins/Psiref_CAS/psi_ref.irp.f +++ b/plugins/Psiref_CAS/psi_ref.irp.f @@ -67,3 +67,36 @@ END_PROVIDER END_PROVIDER + BEGIN_PROVIDER [double precision, norm_psi_ref, (N_states)] +&BEGIN_PROVIDER [double precision, inv_norm_psi_ref, (N_states)] + implicit none + integer :: i,j + norm_psi_ref = 0.d0 + do j = 1, N_states + do i = 1, N_det_ref + norm_psi_ref(j) += psi_ref_coef(i,j) * psi_ref_coef(i,j) + enddo + inv_norm_psi_ref(j) = 1.d0/(dsqrt(norm_psi_Ref(j))) + enddo + + END_PROVIDER + + BEGIN_PROVIDER [double precision, psi_ref_coef_interm_norm, (N_det_ref,N_states)] + implicit none + integer :: i,j + do j = 1, N_states + do i = 1, N_det_ref + psi_ref_coef_interm_norm(i,j) = inv_norm_psi_ref(j) * psi_ref_coef(i,j) + enddo + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, psi_non_ref_coef_interm_norm, (N_det_non_ref,N_states)] + implicit none + integer :: i,j + do j = 1, N_states + do i = 1, N_det_non_ref + psi_non_ref_coef_interm_norm(i,j) = psi_non_ref_coef(i,j) * inv_norm_psi_ref(j) + enddo + enddo + END_PROVIDER diff --git a/plugins/mrsc2_no_amp/NEEDED_CHILDREN_MODULES b/plugins/mrsc2_no_amp/NEEDED_CHILDREN_MODULES new file mode 100644 index 00000000..f04fe3b0 --- /dev/null +++ b/plugins/mrsc2_no_amp/NEEDED_CHILDREN_MODULES @@ -0,0 +1 @@ +Psiref_CAS Determinants Davidson diff --git a/plugins/mrsc2_no_amp/README.rst b/plugins/mrsc2_no_amp/README.rst new file mode 100644 index 00000000..b24848f7 --- /dev/null +++ b/plugins/mrsc2_no_amp/README.rst @@ -0,0 +1,12 @@ +============ +mrsc2_no_amp +============ + +Needed Modules +============== +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. +Documentation +============= +.. Do not edit this section It was auto-generated +.. by the `update_README.py` script. diff --git a/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f b/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f new file mode 100644 index 00000000..b8b021e8 --- /dev/null +++ b/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f @@ -0,0 +1,78 @@ + BEGIN_PROVIDER [double precision, CI_eigenvectors_sc2_no_amp, (N_det,N_states_diag)] +&BEGIN_PROVIDER [double precision, CI_eigenvectors_s2_sc2_no_amp, (N_states_diag)] +&BEGIN_PROVIDER [double precision, CI_electronic_energy_sc2_no_amp, (N_states_diag)] + implicit none + integer :: i,j,k,l + integer, allocatable :: idx(:) + double precision, allocatable :: e_corr(:,:) + double precision, allocatable :: accu(:) + double precision, allocatable :: ihpsi_current(:) + double precision, allocatable :: H_jj(:),H_jj_total(:),S2_jj(:) + allocate(e_corr(N_det_non_ref,N_states),ihpsi_current(N_states),accu(N_states),H_jj(N_det_non_ref),idx(0:N_det_non_ref)) + allocate(H_jj_total(N_det),S2_jj(N_det)) + accu = 0.d0 + do i = 1, N_det_non_ref + call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef_interm_norm, N_int, N_det_ref,& + size(psi_ref_coef_interm_norm,1), N_states,ihpsi_current) + do j = 1, N_states + e_corr(i,j) = psi_non_ref_coef_interm_norm(i,j) * ihpsi_current(j) + accu(j) += e_corr(i,j) + enddo + enddo + double precision :: hjj,diag_h_mat_elem + do i = 1, N_det_non_ref + call filter_not_connected(psi_non_ref,psi_non_ref(1,1,i),N_int,N_det_non_ref,idx) + H_jj(i) = 0.d0 + do j = 1, idx(0) + H_jj(i) += e_corr(idx(j),1) + enddo + enddo + do i=1,N_Det + H_jj_total(i) = diag_h_mat_elem(psi_det(1,1,i),N_int) + call get_s2(psi_det(1,1,i),psi_det(1,1,i),N_int,S2_jj(i)) + enddo + do i=1, N_det_non_ref + H_jj_total(idx_non_ref(i)) += H_jj(i) + enddo + + + call davidson_diag_hjj_sjj(psi_det,CI_eigenvectors_sc2_no_amp,H_jj_total,S2_jj,CI_electronic_energy_sc2_no_amp,size(CI_eigenvectors_sc2_no_amp,1),N_Det,N_states,N_states_diag,N_int,6) + do i=1,N_states_diag + CI_eigenvectors_s2_sc2_no_amp(i) = S2_jj(i) + enddo + + deallocate(e_corr,ihpsi_current,accu,H_jj,idx,H_jj_total,s2_jj) +END_PROVIDER + +BEGIN_PROVIDER [ double precision, CI_energy_sc2_no_amp, (N_states_diag) ] + implicit none + BEGIN_DOC + ! N_states lowest eigenvalues of the CI matrix + END_DOC + + integer :: j + character*(8) :: st + call write_time(output_determinants) + do j=1,min(N_det,N_states_diag) + CI_energy_sc2_no_amp(j) = CI_electronic_energy_sc2_no_amp(j) + nuclear_repulsion + enddo + do j=1,min(N_det,N_states) + write(st,'(I4)') j + call write_double(output_determinants,CI_energy_sc2_no_amp(j),'Energy of state '//trim(st)) + call write_double(output_determinants,CI_eigenvectors_s2_sc2_no_amp(j),'S^2 of state '//trim(st)) + enddo + +END_PROVIDER + +subroutine diagonalize_CI_sc2_no_amp + implicit none + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors_sc2_no_amp(i,j) + enddo + enddo + SOFT_TOUCH ci_eigenvectors_s2_sc2_no_amp ci_eigenvectors_sc2_no_amp ci_electronic_energy_sc2_no_amp ci_energy_sc2_no_amp psi_coef + +end + diff --git a/plugins/mrsc2_no_amp/sc2_no_amp.irp.f b/plugins/mrsc2_no_amp/sc2_no_amp.irp.f new file mode 100644 index 00000000..622d7236 --- /dev/null +++ b/plugins/mrsc2_no_amp/sc2_no_amp.irp.f @@ -0,0 +1,9 @@ +program pouet + implicit none + integer :: i + do i = 1, 10 + call diagonalize_CI_sc2_no_amp + TOUCH psi_coef + enddo + +end diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index da333b1e..b76540f7 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -1,4 +1,102 @@ +subroutine filter_not_connected(key1,key2,Nint,sze,idx) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the array idx which contains the index of the + ! + ! determinants in the array key1 that DO NOT interact + ! + ! via the H operator with key2. + ! + ! idx(0) is the number of determinants that DO NOT interact with key1 + END_DOC + integer, intent(in) :: Nint, sze + integer(bit_kind), intent(in) :: key1(Nint,2,sze) + integer(bit_kind), intent(in) :: key2(Nint,2) + integer, intent(out) :: idx(0:sze) + + integer :: i,j,l + integer :: degree_x2 + + + ASSERT (Nint > 0) + ASSERT (sze >= 0) + + l=1 + + if (Nint==1) then + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = popcnt( xor( key1(1,1,i), key2(1,1))) & + + popcnt( xor( key1(1,2,i), key2(1,2))) + if (degree_x2 > 4) then + idx(l) = i + l = l+1 + else + cycle + endif + enddo + + else if (Nint==2) then + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & + popcnt(xor( key1(2,1,i), key2(2,1))) + & + popcnt(xor( key1(1,2,i), key2(1,2))) + & + popcnt(xor( key1(2,2,i), key2(2,2))) + if (degree_x2 > 4) then + idx(l) = i + l = l+1 + else + cycle + endif + enddo + + else if (Nint==3) then + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & + popcnt(xor( key1(1,2,i), key2(1,2))) + & + popcnt(xor( key1(2,1,i), key2(2,1))) + & + popcnt(xor( key1(2,2,i), key2(2,2))) + & + popcnt(xor( key1(3,1,i), key2(3,1))) + & + popcnt(xor( key1(3,2,i), key2(3,2))) + if (degree_x2 > 4) then + idx(l) = i + l = l+1 + else + cycle + endif + enddo + + else + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = 0 + !DEC$ LOOP COUNT MIN(4) + do j=1,Nint + degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +& + popcnt(xor( key1(j,2,i), key2(j,2))) + if (degree_x2 > 4) then + idx(l) = i + l = l+1 + endif + enddo + if (degree_x2 <= 5) then + exit + endif + enddo + + endif + idx(0) = l-1 +end + + subroutine filter_connected(key1,key2,Nint,sze,idx) use bitmasks implicit none From fb1fa1af38f72f2a6fe4b2b6368ccb8dc81a77f6 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 12 Jan 2017 15:09:27 +0100 Subject: [PATCH 020/106] hard-coded fragmentation of first generators --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 26 +++--- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 6 +- plugins/Full_CI_ZMQ/selection_double.irp.f | 85 ++++++++++++------- 4 files changed, 72 insertions(+), 47 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 40f4849a..43a64583 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -87,7 +87,7 @@ program fci_zmq threshold_selectors = 1.d0 threshold_generators = 1d0 ! 0.9999d0 E_CI_before(1:N_states) = CI_energy(1:N_states) - !call ZMQ_selection(0, pt2) pour non-stochastic + !call ZMQ_selection(0, pt2)! pour non-stochastic call ZMQ_pt2(pt2) print *, 'Final step' print *, 'N_det = ', N_det @@ -121,7 +121,7 @@ subroutine ZMQ_pt2(pt2) double precision, allocatable :: pt2_detail(:,:), comb(:) logical, allocatable :: computed(:) integer, allocatable :: tbc(:) - integer :: i, Ncomb, generator_per_task, i_generator_end + integer :: i, j, Ncomb, generator_per_task, i_generator_end integer, external :: pt2_find double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) @@ -131,7 +131,7 @@ subroutine ZMQ_pt2(pt2) allocate(pt2_detail(N_states, N_det_generators), comb(100000), computed(N_det_generators), tbc(0:N_det_generators)) provide nproc - call random_seed() + !call random_seed() computed = .false. tbc(0) = first_det_of_comb - 1 @@ -163,9 +163,19 @@ subroutine ZMQ_pt2(pt2) do i=tbc(0),1,-1 ! generator_per_task i_generator_end = min(i+generator_per_task-1, tbc(0)) !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) - write(task,*) (i_generator_end-i+1), tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket,task) + if(i > 10) then + integer :: zero + zero = 0 + write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + else + do j=1,8 + write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + end if end do + print *, "tasked" !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() @@ -180,7 +190,6 @@ subroutine ZMQ_pt2(pt2) call do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) !END LOOP? integer :: tooth - !-8.091550677158776E-003 call get_first_tooth(computed, tooth) !print *, "TOOTH ", tooth @@ -199,9 +208,7 @@ subroutine ZMQ_pt2(pt2) if(Nabove(tooth) >= 30) then E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) - !print *, "preprop ", prop, weight(first_det_of_teeth(tooth)) prop = prop / weight(first_det_of_teeth(tooth)) - !print *, "prop", prop E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) @@ -230,7 +237,6 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) myVal = 0d0 myVal2 = 0d0 do j=comb_teeth,1,-1 - !if(pt2_detail(1, dets(j)) == -1d0) print *, "uncalculatedidified", dets(j), pt2_detail(1, dets(j)-1:dets(j)+1) myVal += pt2_detail(1, dets(j)) / weight(dets(j)) * comb_step sumabove(j) += myVal sum2above(j) += myVal**2 @@ -487,7 +493,7 @@ subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) comb(i) = comb(i) * comb_step call add_comb(comb(i), computed, tbc, myWorkload) Ncomb = i - if(myWorkload > maxWorkload .and. i >= 30) exit + if(myWorkload > maxWorkload .and. i >= 50) exit end do call reorder_tbc(tbc) end subroutine diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 36550116..a4ae5816 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -53,7 +53,7 @@ subroutine run_selection_slave(thread,iproc,energy) !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) do i_generator=i_generator_start,i_generator_max,step - call select_connected(i_generator,energy,pt2,buf) + call select_connected(i_generator,energy,pt2,buf,0) enddo endif diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 96b45774..b6b737ca 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -55,11 +55,11 @@ subroutine get_mask_phase(det, phasemask) end subroutine -subroutine select_connected(i_generator,E0,pt2,b) +subroutine select_connected(i_generator,E0,pt2,b,subset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator + integer, intent(in) :: i_generator, subset type(selection_buffer), intent(inout) :: b double precision, intent(inout) :: pt2(N_states) integer :: k,l @@ -78,7 +78,7 @@ subroutine select_connected(i_generator,E0,pt2,b) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) + call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset) enddo end subroutine diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index e177c494..0da47d68 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -1,10 +1,10 @@ -subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) +subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator + integer, intent(in) :: i_generator, subset integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_tot_num) double precision, intent(in) :: E0(N_states) @@ -21,7 +21,9 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical :: monoAdo, monoBdo; - + integer :: maskInd + maskInd = -1 + monoAdo = .true. monoBdo = .true. @@ -73,6 +75,19 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first + !if(subset /= 0 .and. mod(maskInd, 10) /= (subset-1)) then + ! maskInd += 1 + ! cycle + !end if + maskInd += 1 + + + + + if(subset == 0 .or. mod(maskInd, 8) == (subset-1)) then + + + h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) @@ -121,8 +136,14 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end if end do - - + + + + + end if + + + do s2=s1,2 sp = s1 @@ -132,41 +153,39 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p if(s1 == s2) ib = i1+1 monoAdo = .true. do i2=N_holes(s2),ib,-1 ! Generate low excitations first - - h2 = hole_list(i2,s2) - call apply_hole(pmask, s2,h2, mask, ok, N_int) - logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) - banned = .false. - - call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) - - if(fullMatch) cycle - - bannedOrb(1:mo_tot_num, 1:2) = .true. - do s3=1,2 - do i=1,N_particles(s3) - bannedOrb(particle_list(i,s3), s3) = .false. + if(subset == 0 .or. mod(maskInd, 8) == (subset-1)) then + h2 = hole_list(i2,s2) + call apply_hole(pmask, s2,h2, mask, ok, N_int) + banned = .false. + bannedOrb(1:mo_tot_num, 1:2) = .true. + do s3=1,2 + do i=1,N_particles(s3) + bannedOrb(particle_list(i,s3), s3) = .false. + enddo enddo - enddo - if(s1 /= s2) then - if(monoBdo) then - bannedOrb(h1,s1) = .false. - end if - if(monoAdo) then - bannedOrb(h2,s2) = .false. - monoAdo = .false. + if(s1 /= s2) then + if(monoBdo) then + bannedOrb(h1,s1) = .false. + end if + if(monoAdo) then + bannedOrb(h2,s2) = .false. + monoAdo = .false. + end if end if + + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + if(fullMatch) cycle + + mat = 0d0 + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) end if - - - mat = 0d0 - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) - enddo if(s1 /= s2) monoBdo = .false. enddo From c525fddf449a1e7a9a4fedbc49a2a953fb2c18e8 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 12 Jan 2017 15:46:10 +0100 Subject: [PATCH 021/106] forgot file --- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 168 ++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 plugins/Full_CI_ZMQ/run_pt2_slave.irp.f diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f new file mode 100644 index 00000000..949a6d28 --- /dev/null +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -0,0 +1,168 @@ + +subroutine run_pt2_slave(thread,iproc,energy) + use f77_zmq + use selection_types + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, task_id(1), ctask, ltask + character*(1000000) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + type(selection_buffer) :: buf, buf2 + logical :: done + + double precision :: pt2(N_states) + double precision,allocatable :: pt2_detail(:,:) + integer,allocatable :: index(:) + integer :: Nindex + + allocate(pt2_detail(N_states, N_det), index(N_det)) + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + if(worker_id == -1) then + print *, "WORKER -1" + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + return + end if + buf%N = 0 + ctask = 1 + pt2 = 0d0 + pt2_detail = 0d0 + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) + + done = task_id(ctask) == 0 + if (done) then + ctask = ctask - 1 + else + integer :: i_generator, i_i_generator, N, subset + read (task,*) Nindex + read (task,*) Nindex, subset, index(:Nindex) + + !!!!! + N=1 + !!!!! + if(buf%N == 0) then + ! Only first time + call create_selection_buffer(N, N*2, buf) + call create_selection_buffer(N, N*3, buf2) + else + if(N /= buf%N) stop "N changed... wtf man??" + end if + do i_i_generator=1, Nindex + i_generator = index(i_i_generator) + call select_connected(i_generator,energy,pt2_detail(1, i_i_generator),buf,subset) + pt2(:) += pt2_detail(:, i_generator) + enddo + endif + + if(done .or. ctask == size(task_id)) then + if(buf%N == 0 .and. ctask > 0) stop "uninitialized selection_buffer" + do i=1, ctask + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) + end do + if(ctask > 0) then + call push_pt2_results(zmq_socket_push, Nindex, index, pt2_detail, task_id(1), ctask) + !print *, "pushed ", index(:Nindex) + do i=1,buf%cur + call add_to_selection_buffer(buf2, buf%det(1,1,i), buf%val(i)) + enddo + call sort_selection_buffer(buf2) + buf%mini = buf2%mini + pt2 = 0d0 + pt2_detail(:,:Nindex) = 0d0 + buf%cur = 0 + end if + ctask = 0 + end if + + if(done) exit + ctask = ctask + 1 + end do + call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) +end subroutine + + +subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntask) + use f77_zmq + use selection_types + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision, intent(in) :: pt2_detail(N_states, N_det) + integer, intent(in) :: ntask, N, index(N), task_id(*) + integer :: rc + + + rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, index, 4*N, ZMQ_SNDMORE) + if(rc /= 4*N) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, pt2_detail, 8*N_states*N, ZMQ_SNDMORE) + if(rc /= 8*N_states*N) stop "push" + + rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0) + if(rc /= 4*ntask) stop "push" + +! Activate is zmq_socket_push is a REQ +! rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) +end subroutine + + +subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntask) + use f77_zmq + use selection_types + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(inout) :: pt2_detail(N_states, N_det) + integer, intent(out) :: index(N_det) + integer, intent(out) :: N, ntask, task_id(*) + integer :: rc, rn, i + + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) + if(rc /= 4) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, index, 4*N, 0) + if(rc /= 4*N) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, pt2_detail, N_states*8*N, 0) + if(rc /= 8*N_states*N) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) + if(rc /= 4) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0) + if(rc /= 4*ntask) stop "pull" + +! Activate is zmq_socket_pull is a REP +! rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) +end subroutine + + +BEGIN_PROVIDER [ double precision, pt2_workload, (N_det) ] + integer :: i + do i=1,N_det + pt2_workload(:) = dfloat(N_det - i + 1)**2 + end do + pt2_workload = pt2_workload / sum(pt2_workload) +END_PROVIDER + From 28f1c576979dec4457ad57f799a93d215be0756c Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 12 Jan 2017 17:34:42 +0100 Subject: [PATCH 022/106] removed single selection --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 209 ---------------------------- 2 files changed, 1 insertion(+), 210 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index d651f77f..087a5579 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -177,7 +177,7 @@ subroutine ZMQ_pt2(pt2) generator_per_task = 1 ! tbc(0)/300 + 1 print *, 'TASKS REVERSED' !do i=1,tbc(0),generator_per_task - do i=tbc(0),1,-1 ! generator_per_task + do i=1,tbc(0) ! generator_per_task i_generator_end = min(i+generator_per_task-1, tbc(0)) !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) if(i > 10) then diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 60979329..f6ecdaa7 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -115,164 +115,6 @@ end subroutine -! Selection single -! ---------------- - -subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) - use bitmasks - use selection_types - implicit none - BEGIN_DOC -! Select determinants connected to i_det by H - END_DOC - integer, intent(in) :: i_gen - integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - - double precision :: vect(N_states, mo_tot_num) - logical :: bannedOrb(mo_tot_num) - integer :: i, j, k - integer :: h1,h2,s1,s2,i1,i2,ib,sp - integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) - logical :: fullMatch, ok - - - do k=1,N_int - hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) - hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) - enddo - - ! Create lists of holes and particles - ! ----------------------------------- - - integer :: N_holes(2), N_particles(2) - integer :: hole_list(N_int*bit_kind_size,2) - integer :: particle_list(N_int*bit_kind_size,2) - - call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) - call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - - do sp=1,2 - do i=1, N_holes(sp) - h1 = hole_list(i,sp) - call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) - bannedOrb = .true. - do j=1,N_particles(sp) - bannedOrb(particle_list(j, sp)) = .false. - end do - call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) - if(fullMatch) cycle - vect = 0d0 - call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_phasemask(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, vect) - call fill_buffer_single(i_gen, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) - end do - enddo -end subroutine - - -subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) - use bitmasks - use selection_types - implicit none - - integer, intent(in) :: i_generator, sp, h1 - double precision, intent(in) :: vect(N_states, mo_tot_num) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - logical :: ok - integer :: s1, s2, p1, p2, ib, istate - integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp - double precision, external :: diag_H_mat_elem_fock - - - call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) - - do p1=1,mo_tot_num - if(bannedOrb(p1)) cycle - if(vect(1, p1) == 0d0) cycle - call apply_particle(mask, sp, p1, det, ok, N_int) - - - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) - max_e_pert = 0d0 - - do istate=1,N_states - val = vect(istate, p1) + vect(istate, p1) - delta_E = E0(istate) - Hii - tmp = dsqrt(delta_E * delta_E + val * val) - if (delta_E < 0.d0) then - tmp = -tmp - endif - e_pert = 0.5d0 * ( tmp - delta_E) - pt2(istate) += e_pert - if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert - end do - - if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) - end do -end subroutine - - -subroutine splash_p(mask, sp, det, phasemask, coefs, N_sel, bannedOrb, vect) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2, N_sel) - double precision, intent(in) :: coefs(N_states, N_sel) - integer, intent(in) :: sp, N_sel - logical, intent(inout) :: bannedOrb(mo_tot_num) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - - integer :: i, j, h(0:2,2), p(0:3,2), nt - integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - do i=1, N_sel - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt > 3) cycle - - do j=1,N_int - perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) - perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) - end do - - call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) - call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) - - call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - - if(nt == 3) then - call get_m2(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - else if(nt == 2) then - call get_m1(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - else - call get_m0(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - end if - end do -end subroutine - - subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) use bitmasks implicit none @@ -420,57 +262,6 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) end subroutine -subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) - integer, intent(in) :: i_gen, N, sp - logical, intent(inout) :: banned(mo_tot_num) - logical, intent(out) :: fullMatch - - - integer :: i, j, na, nb, list(3), nt - integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) - - fullMatch = .false. - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - genl : do i=1, N - nt = 0 - - do j=1, N_int - myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) - myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) - nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) - end do - - if(nt > 3) cycle - - if(nt <= 2 .and. i < i_gen) then - fullMatch = .true. - return - end if - - call bitstring_to_list(myMask(1,sp), list(1), na, N_int) - - if(nt == 3 .and. i < i_gen) then - do j=1,na - banned(list(j)) = .true. - end do - else if(nt == 1 .and. na == 1) then - banned(list(1)) = .true. - end if - end do genl -end subroutine - - - - ! Selection double ! ---------------- From 4908a6898319668c2be5b0839093e45013836e6b Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 13 Jan 2017 08:15:36 +0100 Subject: [PATCH 023/106] fixed fragmentation --- plugins/Full_CI_ZMQ/selection.irp.f | 88 ++++++++++++++++++----------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index f6ecdaa7..2f524022 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -261,16 +261,12 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) end do end subroutine - -! Selection double -! ---------------- - -subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) +subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator + integer, intent(in) :: i_generator, subset integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_tot_num) double precision, intent(in) :: E0(N_states) @@ -287,7 +283,9 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical :: monoAdo, monoBdo; - + integer :: maskInd + maskInd = -1 + monoAdo = .true. monoBdo = .true. @@ -339,6 +337,19 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first + !if(subset /= 0 .and. mod(maskInd, 10) /= (subset-1)) then + ! maskInd += 1 + ! cycle + !end if + maskInd += 1 + + + + + if(subset == 0 .or. mod(maskInd, 8) == (subset-1)) then + + + h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) @@ -387,48 +398,56 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end if end do + + + + + end if + + + do s2=s1,2 sp = s1 + if(s1 /= s2) sp = 3 ib = 1 if(s1 == s2) ib = i1+1 monoAdo = .true. do i2=N_holes(s2),ib,-1 ! Generate low excitations first - - h2 = hole_list(i2,s2) - call apply_hole(pmask, s2,h2, mask, ok, N_int) - logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) - banned = .false. - - call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) - - if(fullMatch) cycle - - bannedOrb(1:mo_tot_num, 1:2) = .true. - do s3=1,2 - do i=1,N_particles(s3) - bannedOrb(particle_list(i,s3), s3) = .false. + if(subset == 0 .or. mod(maskInd, 8) == (subset-1)) then + h2 = hole_list(i2,s2) + call apply_hole(pmask, s2,h2, mask, ok, N_int) + banned = .false. + bannedOrb(1:mo_tot_num, 1:2) = .true. + do s3=1,2 + do i=1,N_particles(s3) + bannedOrb(particle_list(i,s3), s3) = .false. + enddo enddo - enddo - if(s1 /= s2) then - if(monoBdo) then - bannedOrb(h1,s1) = .false. - end if - if(monoAdo) then - bannedOrb(h2,s2) = .false. - monoAdo = .false. + if(s1 /= s2) then + if(monoBdo) then + bannedOrb(h1,s1) = .false. + end if + if(monoAdo) then + bannedOrb(h2,s2) = .false. + monoAdo = .false. + end if end if + + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + if(fullMatch) cycle + + mat = 0d0 + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) end if - - - mat = 0d0 - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) enddo if(s1 /= s2) monoBdo = .false. enddo @@ -437,6 +456,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end subroutine + subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) use bitmasks use selection_types From 4b6d2f0d03c8b66a24f1b244ae6876ee2910e449 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Jan 2017 13:15:51 +0100 Subject: [PATCH 024/106] Added more elements to print_mo --- src/Nuclei/nuclei.irp.f | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Nuclei/nuclei.irp.f b/src/Nuclei/nuclei.irp.f index a8def602..c4729713 100644 --- a/src/Nuclei/nuclei.irp.f +++ b/src/Nuclei/nuclei.irp.f @@ -169,7 +169,7 @@ END_PROVIDER 'Nuclear repulsion energy') END_PROVIDER -BEGIN_PROVIDER [ character*(128), element_name, (36)] +BEGIN_PROVIDER [ character*(128), element_name, (54)] BEGIN_DOC ! Array of the name of element, sorted by nuclear charge (integer) END_DOC @@ -209,4 +209,22 @@ BEGIN_PROVIDER [ character*(128), element_name, (36)] element_name(34) = 'Se' element_name(35) = 'Br' element_name(36) = 'Kr' + element_name(37) = 'Rb' + element_name(38) = 'Sr' + element_name(39) = 'Y' + element_name(40) = 'Zr' + element_name(41) = 'Nb' + element_name(42) = 'Mo' + element_name(43) = 'Tc' + element_name(44) = 'Ru' + element_name(45) = 'Rh' + element_name(46) = 'Pd' + element_name(47) = 'Ag' + element_name(48) = 'Cd' + element_name(49) = 'In' + element_name(50) = 'Sn' + element_name(51) = 'Sb' + element_name(52) = 'Te' + element_name(53) = 'I' + element_name(54) = 'Xe' END_PROVIDER From 252e604e7bc2e1d7617c38818a77853ece7c9615 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Jan 2017 13:18:59 +0100 Subject: [PATCH 025/106] Dos2Unix --- plugins/CAS_SD_ZMQ/selection.irp.f | 2414 ++++++++++++++-------------- 1 file changed, 1207 insertions(+), 1207 deletions(-) diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f index f90ee488..db8ebbf0 100644 --- a/plugins/CAS_SD_ZMQ/selection.irp.f +++ b/plugins/CAS_SD_ZMQ/selection.irp.f @@ -1,1207 +1,1207 @@ -use bitmasks - - -double precision function integral8(i,j,k,l) - implicit none - - integer, intent(in) :: i,j,k,l - double precision, external :: get_mo_bielec_integral - integer :: ii - ii = l-mo_integrals_cache_min - ii = ior(ii, k-mo_integrals_cache_min) - ii = ior(ii, j-mo_integrals_cache_min) - ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -64) /= 0) then - integral8 = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) - else - ii = l-mo_integrals_cache_min - ii = ior( ishft(ii,6), k-mo_integrals_cache_min) - ii = ior( ishft(ii,6), j-mo_integrals_cache_min) - ii = ior( ishft(ii,6), i-mo_integrals_cache_min) - integral8 = mo_integrals_cache(ii) - endif -end function - - -BEGIN_PROVIDER [ integer(1), psi_phasemask, (N_int*bit_kind_size, 2, N_det)] - use bitmasks - implicit none - - integer :: i - do i=1, N_det - call get_mask_phase(psi_selectors(1,1,i), psi_phasemask(1,1,i)) - end do -END_PROVIDER - - -subroutine assert(cond, msg) - character(*), intent(in) :: msg - logical, intent(in) :: cond - - if(.not. cond) then - print *, "assert fail: "//msg - stop - end if -end subroutine - - -subroutine get_mask_phase(det, phasemask) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: det(N_int, 2) - integer(1), intent(out) :: phasemask(N_int*bit_kind_size, 2) - integer :: s, ni, i - logical :: change - - phasemask = 0_1 - do s=1,2 - change = .false. - do ni=1,N_int - do i=0,bit_kind_size-1 - if(BTEST(det(ni, s), i)) change = .not. change - if(change) phasemask((ni-1)*bit_kind_size + i + 1, s) = 1_1 - end do - end do - end do -end subroutine - - -subroutine select_connected(i_generator,E0,pt2,b) - use bitmasks - use selection_types - implicit none - integer, intent(in) :: i_generator - type(selection_buffer), intent(inout) :: b - double precision, intent(inout) :: pt2(N_states) - integer :: k,l - double precision, intent(in) :: E0(N_states) - - integer(bit_kind) :: hole_mask(N_int,2), particle_mask(N_int,2) - double precision :: fock_diag_tmp(2,mo_tot_num+1) - - call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) - - do l=1,N_generators_bitmask - do k=1,N_int - hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole,l), psi_det_generators(k,1,i_generator)) - hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole,l), psi_det_generators(k,2,i_generator)) - particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) - particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) - - enddo - call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) - call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) - enddo -end subroutine - - -double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) - use bitmasks - implicit none - - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - integer, intent(in) :: s1, s2, h1, h2, p1, p2 - logical :: change - integer(1) :: np - double precision, parameter :: res(0:1) = (/1d0, -1d0/) - - np = phasemask(h1,s1) + phasemask(p1,s1) + phasemask(h2,s2) + phasemask(p2,s2) - if(p1 < h1) np = np + 1_1 - if(p2 < h2) np = np + 1_1 - - if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1_1 - get_phase_bi = res(iand(np,1_1)) -end subroutine - - - -! Selection single -! ---------------- - -subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) - use bitmasks - use selection_types - implicit none - BEGIN_DOC -! Select determinants connected to i_det by H - END_DOC - integer, intent(in) :: i_gen - integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - - double precision :: vect(N_states, mo_tot_num) - logical :: bannedOrb(mo_tot_num) - integer :: i, j, k - integer :: h1,h2,s1,s2,i1,i2,ib,sp - integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) - logical :: fullMatch, ok - - - do k=1,N_int - hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) - hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) - enddo - - ! Create lists of holes and particles - ! ----------------------------------- - - integer :: N_holes(2), N_particles(2) - integer :: hole_list(N_int*bit_kind_size,2) - integer :: particle_list(N_int*bit_kind_size,2) - - call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) - call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - - do sp=1,2 - do i=1, N_holes(sp) - h1 = hole_list(i,sp) - call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) - bannedOrb = .true. - do j=1,N_particles(sp) - bannedOrb(particle_list(j, sp)) = .false. - end do - call spot_hasBeen(mask, sp, psi_selectors, i_gen, N_det, bannedOrb, fullMatch) - if(fullMatch) cycle - vect = 0d0 - call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_phasemask(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, vect) - call fill_buffer_single(i_gen, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) - end do - enddo -end subroutine - - -subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) - use bitmasks - use selection_types - implicit none - - integer, intent(in) :: i_generator, sp, h1 - double precision, intent(in) :: vect(N_states, mo_tot_num) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - logical :: ok - integer :: s1, s2, p1, p2, ib, istate - integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp - double precision, external :: diag_H_mat_elem_fock - - - call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) - - do p1=1,mo_tot_num - if(bannedOrb(p1)) cycle - if(vect(1, p1) == 0d0) cycle - call apply_particle(mask, sp, p1, det, ok, N_int) - - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) - max_e_pert = 0d0 - - do istate=1,N_states - val = vect(istate, p1) + vect(istate, p1) - delta_E = E0(istate) - Hii - tmp = dsqrt(delta_E * delta_E + val * val) - if (delta_E < 0.d0) then - tmp = -tmp - endif - e_pert = 0.5d0 * ( tmp - delta_E) - pt2(istate) += e_pert - if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert - end do - - if(dabs(max_e_pert) > buf%mini) then - call add_to_selection_buffer(buf, det, max_e_pert) - endif - end do -end subroutine - - -subroutine splash_p(mask, sp, det, phasemask, coefs, N_sel, bannedOrb, vect) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2, N_sel) - double precision, intent(in) :: coefs(N_states, N_sel) - integer, intent(in) :: sp, N_sel - logical, intent(inout) :: bannedOrb(mo_tot_num) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - - integer :: i, j, h(0:2,2), p(0:3,2), nt - integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - do i=1, N_sel - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt > 3) cycle - - do j=1,N_int - perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) - perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) - end do - - call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) - call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) - - call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - - if(nt == 3) then - call get_m2(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - else if(nt == 2) then - call get_m1(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - else - call get_m0(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) - end if - end do -end subroutine - - -subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) - integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti - double precision :: hij - double precision, external :: get_phase_bi, integral8 - - integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - integer, parameter :: turn2(2) = (/2,1/) - - if(h(0,sp) == 2) then - h1 = h(1, sp) - h2 = h(2, sp) - do i=1,3 - puti = p(i, sp) - if(bannedOrb(puti)) cycle - p1 = p(turn3_2(1,i), sp) - p2 = p(turn3_2(2,i), sp) - hij = integral8(p1, p2, h1, h2) - integral8(p2, p1, h1, h2) - hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) - vect(:, puti) += hij * coefs - end do - else if(h(0,sp) == 1) then - sfix = turn2(sp) - hfix = h(1,sfix) - pfix = p(1,sfix) - hmob = h(1,sp) - do j=1,2 - puti = p(j, sp) - if(bannedOrb(puti)) cycle - pmob = p(turn2(j), sp) - hij = integral8(pfix, pmob, hfix, hmob) - hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) - vect(:, puti) += hij * coefs - end do - else - puti = p(1,sp) - if(.not. bannedOrb(puti)) then - sfix = turn2(sp) - p1 = p(1,sfix) - p2 = p(2,sfix) - h1 = h(1,sfix) - h2 = h(2,sfix) - hij = (integral8(p1,p2,h1,h2) - integral8(p2,p1,h1,h2)) - hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) - vect(:, puti) += hij * coefs - end if - end if -end subroutine - - - -subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) - integer :: i, hole, p1, p2, sh - logical :: ok, lbanned(mo_tot_num) - integer(bit_kind) :: det(N_int, 2) - double precision :: hij - double precision, external :: get_phase_bi, integral8 - - lbanned = bannedOrb - sh = 1 - if(h(0,2) == 1) sh = 2 - hole = h(1, sh) - lbanned(p(1,sp)) = .true. - if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. - !print *, "SPm1", sp, sh - - p1 = p(1, sp) - - if(sp == sh) then - p2 = p(2, sp) - lbanned(p2) = .true. - - do i=1,hole-1 - if(lbanned(i)) cycle - hij = (integral8(p1, p2, i, hole) - integral8(p2, p1, i, hole)) - hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) - vect(:,i) += hij * coefs - end do - do i=hole+1,mo_tot_num - if(lbanned(i)) cycle - hij = (integral8(p1, p2, hole, i) - integral8(p2, p1, hole, i)) - hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) - vect(:,i) += hij * coefs - end do - - call apply_particle(mask, sp, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - vect(:, p2) += hij * coefs - else - p2 = p(1, sh) - do i=1,mo_tot_num - if(lbanned(i)) cycle - hij = integral8(p1, p2, i, hole) - hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) - vect(:,i) += hij * coefs - end do - end if - - call apply_particle(mask, sp, p1, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - vect(:, p1) += hij * coefs -end subroutine - - -subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: vect(N_states, mo_tot_num) - integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) - integer :: i - logical :: ok, lbanned(mo_tot_num) - integer(bit_kind) :: det(N_int, 2) - double precision :: hij - - lbanned = bannedOrb - lbanned(p(1,sp)) = .true. - do i=1,mo_tot_num - if(lbanned(i)) cycle - call apply_particle(mask, sp, i, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - vect(:, i) += hij * coefs - end do -end subroutine - - -subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) - integer, intent(in) :: i_gen, N, sp - logical, intent(inout) :: banned(mo_tot_num) - logical, intent(out) :: fullMatch - - - integer :: i, j, na, nb, list(3), nt - integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) - - fullMatch = .false. - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - do i=1, N - nt = 0 - - do j=1, N_int - myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) - myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) - nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) - end do - - if(nt > 3) cycle - - if(nt <= 2 .and. i < i_gen) then - fullMatch = .true. - return - end if - - call bitstring_to_list(myMask(1,sp), list(1), na, N_int) - - if(nt == 3 .and. i < i_gen) then - do j=1,na - banned(list(j)) = .true. - end do - else if(nt == 1 .and. na == 1) then - banned(list(1)) = .true. - end if - end do -end subroutine - - - - -! Selection double -! ---------------- - -subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) - use bitmasks - use selection_types - implicit none - - integer, intent(in) :: i_generator - integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - - double precision :: mat(N_states, mo_tot_num, mo_tot_num) - integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii - integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) - logical :: fullMatch, ok - - integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) - integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) - integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) - - allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) - allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) - - do k=1,N_int - hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) - hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) - particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) - particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) - enddo - - integer :: N_holes(2), N_particles(2) - integer :: hole_list(N_int*bit_kind_size,2) - integer :: particle_list(N_int*bit_kind_size,2) - - call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) - call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - - - preinteresting(0) = 0 - prefullinteresting(0) = 0 - - do i=1,N_int - negMask(i,1) = not(psi_det_generators(i,1,i_generator)) - negMask(i,2) = not(psi_det_generators(i,2,i_generator)) - end do - - do i=1,N_det - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), psi_selectors(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_selectors(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt <= 4) then - if(i <= N_det_selectors) then - preinteresting(0) += 1 - preinteresting(preinteresting(0)) = i - else if(nt <= 2) then - prefullinteresting(0) += 1 - prefullinteresting(prefullinteresting(0)) = i - end if - end if - end do - - - do s1=1,2 - do i1=N_holes(s1),1,-1 ! Generate low excitations first - h1 = hole_list(i1,s1) - call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) - - do i=1,N_int - negMask(i,1) = not(pmask(i,1)) - negMask(i,2) = not(pmask(i,2)) - end do - - interesting(0) = 0 - fullinteresting(0) = 0 - - do ii=1,preinteresting(0) - i = preinteresting(ii) - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), psi_selectors(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_selectors(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt <= 4) then - interesting(0) += 1 - interesting(interesting(0)) = i - minilist(:,:,interesting(0)) = psi_selectors(:,:,i) - if(nt <= 2) then - fullinteresting(0) += 1 - fullinteresting(fullinteresting(0)) = i - fullminilist(:,:,fullinteresting(0)) = psi_selectors(:,:,i) - end if - end if - end do - - do ii=1,prefullinteresting(0) - i = prefullinteresting(ii) - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), psi_selectors(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_selectors(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt <= 2) then - fullinteresting(0) += 1 - fullinteresting(fullinteresting(0)) = i - fullminilist(:,:,fullinteresting(0)) = psi_selectors(:,:,i) - end if - end do - - do s2=s1,2 - sp = s1 - if(s1 /= s2) sp = 3 - - ib = 1 - if(s1 == s2) ib = i1+1 - do i2=N_holes(s2),ib,-1 ! Generate low excitations first - - h2 = hole_list(i2,s2) - call apply_hole(pmask, s2,h2, mask, ok, N_int) - - logical :: banned(mo_tot_num, mo_tot_num,2) - logical :: bannedOrb(mo_tot_num, 2) - - banned = .false. - - call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) - - if(fullMatch) cycle - - bannedOrb(1:mo_tot_num, 1:2) = .true. - do s3=1,2 - do i=1,N_particles(s3) - bannedOrb(particle_list(i,s3), s3) = .false. - enddo - enddo - - mat = 0d0 - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) - call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) - enddo - enddo - enddo - enddo -end subroutine - - -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) - use bitmasks - use selection_types - implicit none - - integer, intent(in) :: i_generator, sp, h1, h2 - double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num) - logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) - type(selection_buffer), intent(inout) :: buf - logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate - integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, max_e_pert,tmp - double precision, external :: diag_H_mat_elem_fock - - logical, external :: detEq - - - if(sp == 3) then - s1 = 1 - s2 = 2 - else - s1 = sp - s2 = sp - end if - - call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) - - do p1=1,mo_tot_num - if(bannedOrb(p1, s1)) cycle - ib = 1 - if(sp /= 3) ib = p1+1 - do p2=ib,mo_tot_num - if(bannedOrb(p2, s2)) cycle - if(banned(p1,p2)) cycle - if(mat(1, p1, p2) == 0d0) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) -logical, external :: is_in_wavefunction -if (is_in_wavefunction(det,N_int)) then - cycle -endif - - - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) - max_e_pert = 0d0 - - do istate=1,N_states - delta_E = E0(istate) - Hii - val = mat(istate, p1, p2) + mat(istate, p1, p2) - tmp = dsqrt(delta_E * delta_E + val * val) - if (delta_E < 0.d0) then - tmp = -tmp - endif - e_pert = 0.5d0 * ( tmp - delta_E) - pt2(istate) = pt2(istate) + e_pert - max_e_pert = min(e_pert,max_e_pert) - end do - - if(dabs(max_e_pert) > buf%mini) then - call add_to_selection_buffer(buf, det, max_e_pert) - end if - end do - end do -end subroutine - - -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) - use bitmasks - implicit none - - integer, intent(in) :: interesting(0:N_sel) - - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) - integer, intent(in) :: sp, i_gen, N_sel - logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) - double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) - - integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt - integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) -! logical :: bandon -! -! bandon = .false. - mat = 0d0 - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - do i=1, N_sel ! interesting(0) - !i = interesting(ii) - - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) - end do - - if(nt > 4) cycle - - do j=1,N_int - perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) - perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) - end do - - call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) - call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) - - call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - - if(interesting(i) < i_gen) then - if(nt == 4) call past_d2(banned, p, sp) - if(nt == 3) call past_d1(bannedOrb, p) - else - if(interesting(i) == i_gen) then -! bandon = .true. - if(sp == 3) then - banned(:,:,2) = transpose(banned(:,:,1)) - else - do k=1,mo_tot_num - do l=k+1,mo_tot_num - banned(l,k,1) = banned(k,l,1) - end do - end do - end if - end if - if(nt == 4) then - call get_d2(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) - else if(nt == 3) then - call get_d1(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) - else - call get_d0(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) - end if - end if - end do -end subroutine - - -subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - double precision, external :: get_phase_bi, integral8 - - integer :: i, j, tip, ma, mi, puti, putj - integer :: h1, h2, p1, p2, i1, i2 - double precision :: hij, phase - - integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) - integer, parameter :: turn2(2) = (/2, 1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - bant = 1 - - tip = p(0,1) * p(0,2) - - ma = sp - if(p(0,1) > p(0,2)) ma = 1 - if(p(0,1) < p(0,2)) ma = 2 - mi = mod(ma, 2) + 1 - - if(sp == 3) then - if(ma == 2) bant = 2 - - if(tip == 3) then - puti = p(1, mi) - do i = 1, 3 - putj = p(i, ma) - if(banned(putj,puti,bant)) cycle - i1 = turn3(1,i) - i2 = turn3(2,i) - p1 = p(i1, ma) - p2 = p(i2, ma) - h1 = h(1, ma) - h2 = h(2, ma) - - hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - if(ma == 1) then - mat(:, putj, puti) += coefs * hij - else - mat(:, puti, putj) += coefs * hij - end if - end do - else - do i = 1,2 - do j = 1,2 - puti = p(i, 1) - putj = p(j, 2) - - if(banned(puti,putj,bant)) cycle - p1 = p(turn2(i), 1) - p2 = p(turn2(j), 2) - h1 = h(1,1) - h2 = h(1,2) - - hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij - end do - end do - end if - - else - if(tip == 0) then - h1 = h(1, ma) - h2 = h(2, ma) - do i=1,3 - puti = p(i, ma) - do j=i+1,4 - putj = p(j, ma) - if(banned(puti,putj,1)) cycle - - i1 = turn2d(1, i, j) - i2 = turn2d(2, i, j) - p1 = p(i1, ma) - p2 = p(i2, ma) - hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij - end do - end do - else if(tip == 3) then - h1 = h(1, mi) - h2 = h(1, ma) - p1 = p(1, mi) - do i=1,3 - puti = p(turn3(1,i), ma) - putj = p(turn3(2,i), ma) - if(banned(puti,putj,1)) cycle - p2 = p(i, ma) - - hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2) - mat(:, min(puti, putj), max(puti, putj)) += coefs * hij - end do - else ! tip == 4 - puti = p(1, sp) - putj = p(2, sp) - if(.not. banned(puti,putj,1)) then - p1 = p(1, mi) - p2 = p(2, mi) - h1 = h(1, mi) - h2 = h(2, mi) - hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij - end if - end if - end if -end subroutine - - -subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(1),intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) - double precision :: hij, tmp_row(N_states, mo_tot_num), tmp_row2(N_states, mo_tot_num) - double precision, external :: get_phase_bi, integral8 - - logical :: lbanned(mo_tot_num, 2), ok - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, hfix, pfix, h1, h2, p1, p2, ib - - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - integer, parameter :: turn2(2) = (/2,1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - - - lbanned = bannedOrb - - do i=1, p(0,1) - lbanned(p(i,1), 1) = .true. - end do - do i=1, p(0,2) - lbanned(p(i,2), 2) = .true. - end do - - ma = 1 - if(p(0,2) >= 2) ma = 2 - mi = turn2(ma) - - bant = 1 - - if(sp == 3) then - !move MA - if(ma == 2) bant = 2 - puti = p(1,mi) - hfix = h(1,ma) - p1 = p(1,ma) - p2 = p(2,ma) - if(.not. bannedOrb(puti, mi)) then - tmp_row = 0d0 - do putj=1, hfix-1 - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - tmp_row(1:N_states,putj) += hij * coefs(1:N_states) - end do - do putj=hfix+1, mo_tot_num - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - tmp_row(1:N_states,putj) += hij * coefs(1:N_states) - end do - - if(ma == 1) then - mat(1:N_states,1:mo_tot_num,puti) += tmp_row(1:N_states,1:mo_tot_num) - else - mat(1:N_states,puti,1:mo_tot_num) += tmp_row(1:N_states,1:mo_tot_num) - end if - end if - - !MOVE MI - pfix = p(1,mi) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_tot_num - if(lbanned(puti,mi)) cycle - !p1 fixed - putj = p1 - if(.not. banned(putj,puti,bant)) then - hij = integral8(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix) - tmp_row(:,puti) += hij * coefs - end if - - putj = p2 - if(.not. banned(putj,puti,bant)) then - hij = integral8(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix) - tmp_row2(:,puti) += hij * coefs - end if - end do - - if(mi == 1) then - mat(:,:,p1) += tmp_row(:,:) - mat(:,:,p2) += tmp_row2(:,:) - else - mat(:,p1,:) += tmp_row(:,:) - mat(:,p2,:) += tmp_row2(:,:) - end if - else - if(p(0,ma) == 3) then - do i=1,3 - hfix = h(1,ma) - puti = p(i, ma) - p1 = p(turn3(1,i), ma) - p2 = p(turn3(2,i), ma) - tmp_row = 0d0 - do putj=1,hfix-1 - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - tmp_row(:,putj) += hij * coefs - end do - do putj=hfix+1,mo_tot_num - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - tmp_row(:,putj) += hij * coefs - end do - - mat(:, :puti-1, puti) += tmp_row(:,:puti-1) - mat(:, puti, puti:) += tmp_row(:,puti:) - end do - else - hfix = h(1,mi) - pfix = p(1,mi) - p1 = p(1,ma) - p2 = p(2,ma) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_tot_num - if(lbanned(puti,ma)) cycle - putj = p2 - if(.not. banned(puti,putj,1)) then - hij = integral8(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1) - tmp_row(:,puti) += hij * coefs - end if - - putj = p1 - if(.not. banned(puti,putj,1)) then - hij = integral8(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2) - tmp_row2(:,puti) += hij * coefs - end if - end do - mat(:,:p2-1,p2) += tmp_row(:,:p2-1) - mat(:,p2,p2:) += tmp_row(:,p2:) - mat(:,:p1-1,p1) += tmp_row2(:,:p1-1) - mat(:,p1,p1:) += tmp_row2(:,p1:) - end if - end if - - !! MONO - if(sp == 3) then - s1 = 1 - s2 = 2 - else - s1 = sp - s2 = sp - end if - - do i1=1,p(0,s1) - ib = 1 - if(s1 == s2) ib = i1+1 - do i2=ib,p(0,s2) - p1 = p(i1,s1) - p2 = p(i2,s2) - if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) += coefs * hij - end do - end do -end subroutine - - - - -subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) - logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - integer :: i, j, s, h1, h2, p1, p2, puti, putj - double precision :: hij, phase - double precision, external :: get_phase_bi, integral8 - logical :: ok - - integer :: bant - bant = 1 - - - if(sp == 3) then ! AB - h1 = p(1,1) - h2 = p(1,2) - do p1=1, mo_tot_num - if(bannedOrb(p1, 1)) cycle - do p2=1, mo_tot_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - end if - mat(:, p1, p2) += coefs(:) * hij - end do - end do - else ! AA BB - p1 = p(1,sp) - p2 = p(2,sp) - do puti=1, mo_tot_num - if(bannedOrb(puti, sp)) cycle - do putj=puti+1, mo_tot_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - hij = (integral8(p1, p2, puti, putj) - integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2) - end if - mat(:, puti, putj) += coefs(:) * hij - end do - end do - end if -end subroutine - - -subroutine past_d1(bannedOrb, p) - use bitmasks - implicit none - - logical, intent(inout) :: bannedOrb(mo_tot_num, 2) - integer, intent(in) :: p(0:4, 2) - integer :: i,s - - do s = 1, 2 - do i = 1, p(0, s) - bannedOrb(p(i, s), s) = .true. - end do - end do -end subroutine - - -subroutine past_d2(banned, p, sp) - use bitmasks - implicit none - - logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) - integer, intent(in) :: p(0:4, 2), sp - integer :: i,j - - if(sp == 3) then - do i=1,p(0,1) - do j=1,p(0,2) - banned(p(i,1), p(j,2)) = .true. - end do - end do - else - do i=1,p(0, sp) - do j=1,i-1 - banned(p(j,sp), p(i,sp)) = .true. - banned(p(i,sp), p(j,sp)) = .true. - end do - end do - end if -end subroutine - - - -subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) - use bitmasks - implicit none - - integer, intent(in) :: interesting(0:N) - integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) - integer, intent(in) :: i_gen, N - logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) - logical, intent(out) :: fullMatch - - - integer :: i, j, na, nb, list(3) - integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) - - fullMatch = .false. - - do i=1,N_int - negMask(i,1) = not(mask(i,1)) - negMask(i,2) = not(mask(i,2)) - end do - - genl : do i=1, N - do j=1, N_int - if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl - if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl - end do - - if(interesting(i) < i_gen) then - fullMatch = .true. - return - end if - - do j=1, N_int - myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) - myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) - end do - - call bitstring_to_list(myMask(1,1), list(1), na, N_int) - call bitstring_to_list(myMask(1,2), list(na+1), nb, N_int) - banned(list(1), list(2)) = .true. - end do genl -end subroutine - +use bitmasks + + +double precision function integral8(i,j,k,l) + implicit none + + integer, intent(in) :: i,j,k,l + double precision, external :: get_mo_bielec_integral + integer :: ii + ii = l-mo_integrals_cache_min + ii = ior(ii, k-mo_integrals_cache_min) + ii = ior(ii, j-mo_integrals_cache_min) + ii = ior(ii, i-mo_integrals_cache_min) + if (iand(ii, -64) /= 0) then + integral8 = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) + else + ii = l-mo_integrals_cache_min + ii = ior( ishft(ii,6), k-mo_integrals_cache_min) + ii = ior( ishft(ii,6), j-mo_integrals_cache_min) + ii = ior( ishft(ii,6), i-mo_integrals_cache_min) + integral8 = mo_integrals_cache(ii) + endif +end function + + +BEGIN_PROVIDER [ integer(1), psi_phasemask, (N_int*bit_kind_size, 2, N_det)] + use bitmasks + implicit none + + integer :: i + do i=1, N_det + call get_mask_phase(psi_selectors(1,1,i), psi_phasemask(1,1,i)) + end do +END_PROVIDER + + +subroutine assert(cond, msg) + character(*), intent(in) :: msg + logical, intent(in) :: cond + + if(.not. cond) then + print *, "assert fail: "//msg + stop + end if +end subroutine + + +subroutine get_mask_phase(det, phasemask) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: det(N_int, 2) + integer(1), intent(out) :: phasemask(N_int*bit_kind_size, 2) + integer :: s, ni, i + logical :: change + + phasemask = 0_1 + do s=1,2 + change = .false. + do ni=1,N_int + do i=0,bit_kind_size-1 + if(BTEST(det(ni, s), i)) change = .not. change + if(change) phasemask((ni-1)*bit_kind_size + i + 1, s) = 1_1 + end do + end do + end do +end subroutine + + +subroutine select_connected(i_generator,E0,pt2,b) + use bitmasks + use selection_types + implicit none + integer, intent(in) :: i_generator + type(selection_buffer), intent(inout) :: b + double precision, intent(inout) :: pt2(N_states) + integer :: k,l + double precision, intent(in) :: E0(N_states) + + integer(bit_kind) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision :: fock_diag_tmp(2,mo_tot_num+1) + + call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) + + do l=1,N_generators_bitmask + do k=1,N_int + hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole,l), psi_det_generators(k,1,i_generator)) + hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole,l), psi_det_generators(k,2,i_generator)) + particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) + particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) + + enddo + call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) + call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) + enddo +end subroutine + + +double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) + use bitmasks + implicit none + + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer, intent(in) :: s1, s2, h1, h2, p1, p2 + logical :: change + integer(1) :: np + double precision, parameter :: res(0:1) = (/1d0, -1d0/) + + np = phasemask(h1,s1) + phasemask(p1,s1) + phasemask(h2,s2) + phasemask(p2,s2) + if(p1 < h1) np = np + 1_1 + if(p2 < h2) np = np + 1_1 + + if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1_1 + get_phase_bi = res(iand(np,1_1)) +end subroutine + + + +! Selection single +! ---------------- + +subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) + use bitmasks + use selection_types + implicit none + BEGIN_DOC +! Select determinants connected to i_det by H + END_DOC + integer, intent(in) :: i_gen + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) + type(selection_buffer), intent(inout) :: buf + + double precision :: vect(N_states, mo_tot_num) + logical :: bannedOrb(mo_tot_num) + integer :: i, j, k + integer :: h1,h2,s1,s2,i1,i2,ib,sp + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + logical :: fullMatch, ok + + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) + enddo + + ! Create lists of holes and particles + ! ----------------------------------- + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + do sp=1,2 + do i=1, N_holes(sp) + h1 = hole_list(i,sp) + call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) + bannedOrb = .true. + do j=1,N_particles(sp) + bannedOrb(particle_list(j, sp)) = .false. + end do + call spot_hasBeen(mask, sp, psi_selectors, i_gen, N_det, bannedOrb, fullMatch) + if(fullMatch) cycle + vect = 0d0 + call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_phasemask(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, vect) + call fill_buffer_single(i_gen, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) + end do + enddo +end subroutine + + +subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) + use bitmasks + use selection_types + implicit none + + integer, intent(in) :: i_generator, sp, h1 + double precision, intent(in) :: vect(N_states, mo_tot_num) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) + type(selection_buffer), intent(inout) :: buf + logical :: ok + integer :: s1, s2, p1, p2, ib, istate + integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) + double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp + double precision, external :: diag_H_mat_elem_fock + + + call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) + + do p1=1,mo_tot_num + if(bannedOrb(p1)) cycle + if(vect(1, p1) == 0d0) cycle + call apply_particle(mask, sp, p1, det, ok, N_int) + + Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) + max_e_pert = 0d0 + + do istate=1,N_states + val = vect(istate, p1) + vect(istate, p1) + delta_E = E0(istate) - Hii + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + e_pert = 0.5d0 * ( tmp - delta_E) + pt2(istate) += e_pert + if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert + end do + + if(dabs(max_e_pert) > buf%mini) then + call add_to_selection_buffer(buf, det, max_e_pert) + endif + end do +end subroutine + + +subroutine splash_p(mask, sp, det, phasemask, coefs, N_sel, bannedOrb, vect) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2, N_sel) + double precision, intent(in) :: coefs(N_states, N_sel) + integer, intent(in) :: sp, N_sel + logical, intent(inout) :: bannedOrb(mo_tot_num) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + + integer :: i, j, h(0:2,2), p(0:3,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 3) cycle + + do j=1,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) + + call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) + + if(nt == 3) then + call get_m2(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else if(nt == 2) then + call get_m1(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else + call get_m0(det(1,1,i), phasemask(1,1,i), bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + end if + end do +end subroutine + + +subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti + double precision :: hij + double precision, external :: get_phase_bi, integral8 + + integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + integer, parameter :: turn2(2) = (/2,1/) + + if(h(0,sp) == 2) then + h1 = h(1, sp) + h2 = h(2, sp) + do i=1,3 + puti = p(i, sp) + if(bannedOrb(puti)) cycle + p1 = p(turn3_2(1,i), sp) + p2 = p(turn3_2(2,i), sp) + hij = integral8(p1, p2, h1, h2) - integral8(p2, p1, h1, h2) + hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end do + else if(h(0,sp) == 1) then + sfix = turn2(sp) + hfix = h(1,sfix) + pfix = p(1,sfix) + hmob = h(1,sp) + do j=1,2 + puti = p(j, sp) + if(bannedOrb(puti)) cycle + pmob = p(turn2(j), sp) + hij = integral8(pfix, pmob, hfix, hmob) + hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) + vect(:, puti) += hij * coefs + end do + else + puti = p(1,sp) + if(.not. bannedOrb(puti)) then + sfix = turn2(sp) + p1 = p(1,sfix) + p2 = p(2,sfix) + h1 = h(1,sfix) + h2 = h(2,sfix) + hij = (integral8(p1,p2,h1,h2) - integral8(p2,p1,h1,h2)) + hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end if + end if +end subroutine + + + +subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, hole, p1, p2, sh + logical :: ok, lbanned(mo_tot_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + double precision, external :: get_phase_bi, integral8 + + lbanned = bannedOrb + sh = 1 + if(h(0,2) == 1) sh = 2 + hole = h(1, sh) + lbanned(p(1,sp)) = .true. + if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. + !print *, "SPm1", sp, sh + + p1 = p(1, sp) + + if(sp == sh) then + p2 = p(2, sp) + lbanned(p2) = .true. + + do i=1,hole-1 + if(lbanned(i)) cycle + hij = (integral8(p1, p2, i, hole) - integral8(p2, p1, i, hole)) + hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + do i=hole+1,mo_tot_num + if(lbanned(i)) cycle + hij = (integral8(p1, p2, hole, i) - integral8(p2, p1, hole, i)) + hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) + vect(:,i) += hij * coefs + end do + + call apply_particle(mask, sp, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p2) += hij * coefs + else + p2 = p(1, sh) + do i=1,mo_tot_num + if(lbanned(i)) cycle + hij = integral8(p1, p2, i, hole) + hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + end if + + call apply_particle(mask, sp, p1, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p1) += hij * coefs +end subroutine + + +subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + logical, intent(in) :: bannedOrb(mo_tot_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_tot_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i + logical :: ok, lbanned(mo_tot_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + + lbanned = bannedOrb + lbanned(p(1,sp)) = .true. + do i=1,mo_tot_num + if(lbanned(i)) cycle + call apply_particle(mask, sp, i, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, i) += hij * coefs + end do +end subroutine + + +subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + integer, intent(in) :: i_gen, N, sp + logical, intent(inout) :: banned(mo_tot_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3), nt + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N + nt = 0 + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) + end do + + if(nt > 3) cycle + + if(nt <= 2 .and. i < i_gen) then + fullMatch = .true. + return + end if + + call bitstring_to_list(myMask(1,sp), list(1), na, N_int) + + if(nt == 3 .and. i < i_gen) then + do j=1,na + banned(list(j)) = .true. + end do + else if(nt == 1 .and. na == 1) then + banned(list(1)) = .true. + end if + end do +end subroutine + + + + +! Selection double +! ---------------- + +subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf) + use bitmasks + use selection_types + implicit none + + integer, intent(in) :: i_generator + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) + type(selection_buffer), intent(inout) :: buf + + double precision :: mat(N_states, mo_tot_num, mo_tot_num) + integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) + logical :: fullMatch, ok + + integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) + integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) + integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) + + allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) + allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2)) + enddo + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + + preinteresting(0) = 0 + prefullinteresting(0) = 0 + + do i=1,N_int + negMask(i,1) = not(psi_det_generators(i,1,i_generator)) + negMask(i,2) = not(psi_det_generators(i,2,i_generator)) + end do + + do i=1,N_det + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_selectors(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_selectors(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 4) then + if(i <= N_det_selectors) then + preinteresting(0) += 1 + preinteresting(preinteresting(0)) = i + else if(nt <= 2) then + prefullinteresting(0) += 1 + prefullinteresting(prefullinteresting(0)) = i + end if + end if + end do + + + do s1=1,2 + do i1=N_holes(s1),1,-1 ! Generate low excitations first + h1 = hole_list(i1,s1) + call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) + + do i=1,N_int + negMask(i,1) = not(pmask(i,1)) + negMask(i,2) = not(pmask(i,2)) + end do + + interesting(0) = 0 + fullinteresting(0) = 0 + + do ii=1,preinteresting(0) + i = preinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_selectors(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_selectors(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 4) then + interesting(0) += 1 + interesting(interesting(0)) = i + minilist(:,:,interesting(0)) = psi_selectors(:,:,i) + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(:,:,fullinteresting(0)) = psi_selectors(:,:,i) + end if + end if + end do + + do ii=1,prefullinteresting(0) + i = prefullinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_selectors(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_selectors(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(:,:,fullinteresting(0)) = psi_selectors(:,:,i) + end if + end do + + do s2=s1,2 + sp = s1 + if(s1 /= s2) sp = 3 + + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=N_holes(s2),ib,-1 ! Generate low excitations first + + h2 = hole_list(i2,s2) + call apply_hole(pmask, s2,h2, mask, ok, N_int) + + logical :: banned(mo_tot_num, mo_tot_num,2) + logical :: bannedOrb(mo_tot_num, 2) + + banned = .false. + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + + if(fullMatch) cycle + + bannedOrb(1:mo_tot_num, 1:2) = .true. + do s3=1,2 + do i=1,N_particles(s3) + bannedOrb(particle_list(i,s3), s3) = .false. + enddo + enddo + + mat = 0d0 + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) + enddo + enddo + enddo + enddo +end subroutine + + +subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) + use bitmasks + use selection_types + implicit none + + integer, intent(in) :: i_generator, sp, h1, h2 + double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) + type(selection_buffer), intent(inout) :: buf + logical :: ok + integer :: s1, s2, p1, p2, ib, j, istate + integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) + double precision :: e_pert, delta_E, val, Hii, max_e_pert,tmp + double precision, external :: diag_H_mat_elem_fock + + logical, external :: detEq + + + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + + do p1=1,mo_tot_num + if(bannedOrb(p1, s1)) cycle + ib = 1 + if(sp /= 3) ib = p1+1 + do p2=ib,mo_tot_num + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + if(mat(1, p1, p2) == 0d0) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) +logical, external :: is_in_wavefunction +if (is_in_wavefunction(det,N_int)) then + cycle +endif + + + Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) + max_e_pert = 0d0 + + do istate=1,N_states + delta_E = E0(istate) - Hii + val = mat(istate, p1, p2) + mat(istate, p1, p2) + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + e_pert = 0.5d0 * ( tmp - delta_E) + pt2(istate) = pt2(istate) + e_pert + max_e_pert = min(e_pert,max_e_pert) + end do + + if(dabs(max_e_pert) > buf%mini) then + call add_to_selection_buffer(buf, det, max_e_pert) + end if + end do + end do +end subroutine + + +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) + use bitmasks + implicit none + + integer, intent(in) :: interesting(0:N_sel) + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + integer, intent(in) :: sp, i_gen, N_sel + logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + + integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) +! logical :: bandon +! +! bandon = .false. + mat = 0d0 + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel ! interesting(0) + !i = interesting(ii) + + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 4) cycle + + do j=1,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) + + call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) + + if(interesting(i) < i_gen) then + if(nt == 4) call past_d2(banned, p, sp) + if(nt == 3) call past_d1(bannedOrb, p) + else + if(interesting(i) == i_gen) then +! bandon = .true. + if(sp == 3) then + banned(:,:,2) = transpose(banned(:,:,1)) + else + do k=1,mo_tot_num + do l=k+1,mo_tot_num + banned(l,k,1) = banned(k,l,1) + end do + end do + end if + end if + if(nt == 4) then + call get_d2(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + else if(nt == 3) then + call get_d1(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + else + call get_d0(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + end if + end if + end do +end subroutine + + +subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, integral8 + + integer :: i, j, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) + if(ma == 1) then + mat(:, putj, puti) += coefs * hij + else + mat(:, puti, putj) += coefs * hij + end if + end do + else + do i = 1,2 + do j = 1,2 + puti = p(i, 1) + putj = p(j, 2) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + p2 = p(turn2(j), 2) + h1 = h(1,1) + h2 = h(1,2) + + hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) + mat(:, puti, putj) += coefs * hij + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + do j=i+1,4 + putj = p(j, ma) + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) + mat(:, puti, putj) += coefs * hij + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + putj = p(turn3(2,i), ma) + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2) + mat(:, min(puti, putj), max(puti, putj)) += coefs * hij + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2) + mat(:, puti, putj) += coefs * hij + end if + end if + end if +end subroutine + + +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(1),intent(in) :: phasemask(N_int*bit_kind_size, 2) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + double precision :: hij, tmp_row(N_states, mo_tot_num), tmp_row2(N_states, mo_tot_num) + double precision, external :: get_phase_bi, integral8 + + logical :: lbanned(mo_tot_num, 2), ok + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, hfix, pfix, h1, h2, p1, p2, ib + + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + + + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) + tmp_row(1:N_states,putj) += hij * coefs(1:N_states) + end do + do putj=hfix+1, mo_tot_num + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) + tmp_row(1:N_states,putj) += hij * coefs(1:N_states) + end do + + if(ma == 1) then + mat(1:N_states,1:mo_tot_num,puti) += tmp_row(1:N_states,1:mo_tot_num) + else + mat(1:N_states,puti,1:mo_tot_num) += tmp_row(1:N_states,1:mo_tot_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_tot_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = integral8(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix) + tmp_row(:,puti) += hij * coefs + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = integral8(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix) + tmp_row2(:,puti) += hij * coefs + end if + end do + + if(mi == 1) then + mat(:,:,p1) += tmp_row(:,:) + mat(:,:,p2) += tmp_row2(:,:) + else + mat(:,p1,:) += tmp_row(:,:) + mat(:,p2,:) += tmp_row2(:,:) + end if + else + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) + tmp_row(:,putj) += hij * coefs + end do + do putj=hfix+1,mo_tot_num + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) + tmp_row(:,putj) += hij * coefs + end do + + mat(:, :puti-1, puti) += tmp_row(:,:puti-1) + mat(:, puti, puti:) += tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_tot_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = integral8(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1) + tmp_row(:,puti) += hij * coefs + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = integral8(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2) + tmp_row2(:,puti) += hij * coefs + end if + end do + mat(:,:p2-1,p2) += tmp_row(:,:p2-1) + mat(:,p2,p2:) += tmp_row(:,p2:) + mat(:,:p1-1,p1) += tmp_row2(:,:p1-1) + mat(:,p1,p1:) += tmp_row2(:,p1:) + end if + end if + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) += coefs * hij + end do + end do +end subroutine + + + + +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, integral8 + logical :: ok + + integer :: bant + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_tot_num + if(bannedOrb(p1, 1)) cycle + do p2=1, mo_tot_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) + end if + mat(:, p1, p2) += coefs(:) * hij + end do + end do + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_tot_num + if(bannedOrb(puti, sp)) cycle + do putj=puti+1, mo_tot_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = (integral8(p1, p2, puti, putj) - integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2) + end if + mat(:, puti, putj) += coefs(:) * hij + end do + end do + end if +end subroutine + + +subroutine past_d1(bannedOrb, p) + use bitmasks + implicit none + + logical, intent(inout) :: bannedOrb(mo_tot_num, 2) + integer, intent(in) :: p(0:4, 2) + integer :: i,s + + do s = 1, 2 + do i = 1, p(0, s) + bannedOrb(p(i, s), s) = .true. + end do + end do +end subroutine + + +subroutine past_d2(banned, p, sp) + use bitmasks + implicit none + + logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) + integer, intent(in) :: p(0:4, 2), sp + integer :: i,j + + if(sp == 3) then + do i=1,p(0,1) + do j=1,p(0,2) + banned(p(i,1), p(j,2)) = .true. + end do + end do + else + do i=1,p(0, sp) + do j=1,i-1 + banned(p(j,sp), p(i,sp)) = .true. + banned(p(i,sp), p(j,sp)) = .true. + end do + end do + end if +end subroutine + + + +subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) + use bitmasks + implicit none + + integer, intent(in) :: interesting(0:N) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + integer, intent(in) :: i_gen, N + logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3) + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + do j=1, N_int + if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl + if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl + end do + + if(interesting(i) < i_gen) then + fullMatch = .true. + return + end if + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + end do + + call bitstring_to_list(myMask(1,1), list(1), na, N_int) + call bitstring_to_list(myMask(1,2), list(na+1), nb, N_int) + banned(list(1), list(2)) = .true. + end do genl +end subroutine + From e93cd9cf3216f2d77eb4531d7080c24e05c192b5 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 16 Jan 2017 14:05:24 +0100 Subject: [PATCH 026/106] results given during iteration --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 203 +++++++++++++++++------------- 1 file changed, 114 insertions(+), 89 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 087a5579..a1756ca7 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -146,6 +146,10 @@ subroutine ZMQ_pt2(pt2) double precision :: time0, time allocate(pt2_detail(N_states, N_det_generators), comb(100000), computed(N_det_generators), tbc(0:N_det_generators)) + sumabove = 0d0 + sum2above = 0d0 + Nabove = 0d0 + provide nproc !call random_seed() @@ -162,95 +166,63 @@ subroutine ZMQ_pt2(pt2) time0 = omp_get_wtime() print *, "grep - time - avg - err - n_combs" do while(.true.) - - call new_parallel_job(zmq_to_qp_run_socket,"pt2") - call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(1, 1*2, b) - + + call new_parallel_job(zmq_to_qp_run_socket,"pt2") + call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(1, 1*2, b) + - - - - - call get_carlo_workbatch(1d-3, computed, comb, Ncomb, tbc) - generator_per_task = 1 ! tbc(0)/300 + 1 - print *, 'TASKS REVERSED' - !do i=1,tbc(0),generator_per_task - do i=1,tbc(0) ! generator_per_task - i_generator_end = min(i+generator_per_task-1, tbc(0)) - !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) - if(i > 10) then - integer :: zero - zero = 0 - write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - else - do j=1,8 - write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) + call get_carlo_workbatch(1d-2, computed, comb, Ncomb, tbc) + generator_per_task = 1 + do i=1,tbc(0) + i_generator_end = min(i+generator_per_task-1, tbc(0)) + if(tbc(i) > 10) then + integer :: zero + zero = 0 + write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - end if - end do + else + do j=1,8 + write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + end if + end do - print *, "tasked" - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) - i = omp_get_thread_num() - if (i==0) then - call pt2_collector(b, pt2_detail) - else - call pt2_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'pt2') - double precision :: E0, avg, eqt - call do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) - !END LOOP? - integer :: tooth - call get_first_tooth(computed, tooth) - !print *, "TOOTH ", tooth - - !!! ASSERT - !do i=1,first_det_of_teeth(tooth) - ! if(not(computed(i))) stop "deter non calc" - !end do - !logical :: ok - !ok = .false. - !do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1) - ! if(not(computed(i))) ok = .true. - !end do - !if(not(ok)) stop "not OK..." - !!!!! - double precision :: prop - if(Nabove(tooth) >= 30) then - E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) - prop = prop / weight(first_det_of_teeth(tooth)) - E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - time = omp_get_wtime() - print "(A, 5(E15.7))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth) - else - print *, Nabove(tooth), "< 30 combs" - end if - tbc(0) = 0 + print *, "tasked" + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove) + else + call pt2_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'pt2') + tbc(0) = 0 end do pt2 = 0d0 end subroutine -subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) +subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) integer, intent(in) :: tbc(0:N_det_generators), Ncomb + logical, intent(in) :: computed(N_det_generators) double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators) double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) integer :: i, dets(comb_teeth) double precision :: myVal, myVal2 - - do i=1,Ncomb + mainLoop : do i=1,Ncomb call get_comb(comb(i), dets) + do j=1,comb_teeth + if(not(computed(dets(j)))) then + exit mainLoop + end if + end do + myVal = 0d0 myVal2 = 0d0 do j=comb_teeth,1,-1 @@ -259,7 +231,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) sum2above(j) += myVal**2 Nabove(j) += 1 end do - end do + end do mainLoop end subroutine @@ -328,15 +300,22 @@ subroutine pt2_slave_inproc(i) call run_pt2_slave(1,i,ci_electronic_energy) end -subroutine pt2_collector(b, pt2_detail) +subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove) use f77_zmq use selection_types use bitmasks implicit none + + integer, intent(in) :: Ncomb + double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) + double precision, intent(in) :: comb(Ncomb) + logical, intent(inout) :: computed(N_det_generators) + integer, intent(in) :: tbc(0:N_det_generators) + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + type(selection_buffer), intent(inout) :: b - double precision, intent(inout) :: pt2_detail(N_states, N_det) double precision :: pt2_mwen(N_states, N_det) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -351,23 +330,43 @@ subroutine pt2_collector(b, pt2_detail) integer, allocatable :: task_id(:) integer :: done, Nindex integer, allocatable :: index(:) - real :: time, time0 + double precision :: time, time0, timeLast + double precision, external :: omp_get_wtime + integer :: tooth, firstTBDcomb, orgTBDcomb + integer, allocatable :: parts_to_get(:) + logical, allocatable :: actually_computed(:) + + allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) + actually_computed = computed + + parts_to_get(:) = 1 + parts_to_get(1:10) = 8 + + do i=1,tbc(0) + actually_computed(tbc(i)) = .false. + end do + + orgTBDcomb = Nabove(1) + firstTBDcomb = 1 + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) done = 0 more = 1 - !pt2_detail = 0d0 - call CPU_TIME(time0) - do while (more == 1) + time0 = omp_get_wtime() + timeLast = time0 + + pullLoop : do while (more == 1) call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) do i=1,Nindex pt2_detail(:, index(i)) += pt2_mwen(:,i) + parts_to_get(index(i)) -= 1 + if(parts_to_get(index(i)) < 0) then + stop "PARTS ??" + end if + if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. end do - - !do i=1, N - ! call add_to_selection_buffer(b, det(1,1,i), val(i)) - !end do do i=1, ntask if(task_id(i) == 0) then @@ -376,9 +375,35 @@ subroutine pt2_collector(b, pt2_detail) call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do done += ntask - call CPU_TIME(time) -! print *, "DONE" , done, time - time0 - end do + + time = omp_get_wtime() + + if(time - timeLast > 30.0 .or. more /= 1) then + timeLast = time + do i=1, first_det_of_teeth(1)-1 + if(not(actually_computed(i))) then + print *, "PT2 : deterministic part not finished" + cycle pullLoop + end if + end do + + + double precision :: E0, avg, eqt, prop + call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) + firstTBDcomb = Nabove(1) - orgTBDcomb + 1 + if(Nabove(1) < 2.0) cycle + call get_first_tooth(actually_computed, tooth) + + E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) + prop = prop / weight(first_det_of_teeth(tooth)) + E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop + avg = E0 + (sumabove(tooth) / Nabove(tooth)) + eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + time = omp_get_wtime() + print "(A, 5(E15.7))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth) + end if + end do pullLoop call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) @@ -510,9 +535,9 @@ subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) comb(i) = comb(i) * comb_step call add_comb(comb(i), computed, tbc, myWorkload) Ncomb = i - if(myWorkload > maxWorkload .and. i >= 50) exit + if(myWorkload > maxWorkload .and. i >= 100) exit end do - call reorder_tbc(tbc) + !call reorder_tbc(tbc) end subroutine From 2fe656d4180d4fc466a1cec09e1a417e463f61f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Jan 2017 16:31:49 +0100 Subject: [PATCH 027/106] Cochonneries --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 8 +++----- plugins/Full_CI_ZMQ/selection_buffer.irp.f | 12 ++++++----- plugins/MRPT_Utils/ezfio_interface.irp.f | 23 ---------------------- 3 files changed, 10 insertions(+), 33 deletions(-) delete mode 100644 plugins/MRPT_Utils/ezfio_interface.irp.f diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 087a5579..6a89fa75 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -68,7 +68,7 @@ program fci_zmq call ezfio_set_full_ci_zmq_energy(CI_energy(1)) n_det_before = N_det - to_select = 2*N_det + to_select = N_det to_select = max(64-to_select, to_select) to_select = min(to_select, N_det_max-n_det_before) call ZMQ_selection(to_select, pt2) @@ -175,7 +175,6 @@ subroutine ZMQ_pt2(pt2) call get_carlo_workbatch(1d-3, computed, comb, Ncomb, tbc) generator_per_task = 1 ! tbc(0)/300 + 1 - print *, 'TASKS REVERSED' !do i=1,tbc(0),generator_per_task do i=1,tbc(0) ! generator_per_task i_generator_end = min(i+generator_per_task-1, tbc(0)) @@ -193,7 +192,6 @@ subroutine ZMQ_pt2(pt2) end if end do - print *, "tasked" !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() if (i==0) then @@ -230,7 +228,7 @@ subroutine ZMQ_pt2(pt2) avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) time = omp_get_wtime() - print "(A, 5(E15.7))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth) + print *, 'PT2stoch ', real (time - time0), real(avg), real(eqt), real(Nabove(tooth)) else print *, Nabove(tooth), "< 30 combs" end if @@ -284,7 +282,7 @@ subroutine ZMQ_selection(N_in, pt2) call new_parallel_job(zmq_to_qp_run_socket,"selection") call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(N, N*2, b) + call create_selection_buffer(N, N*8, b) integer :: i_generator, i_generator_start, i_generator_max, step ! step = int(max(1.,10*elec_num/mo_tot_num) diff --git a/plugins/Full_CI_ZMQ/selection_buffer.irp.f b/plugins/Full_CI_ZMQ/selection_buffer.irp.f index 2bcb11d3..f06f9726 100644 --- a/plugins/Full_CI_ZMQ/selection_buffer.irp.f +++ b/plugins/Full_CI_ZMQ/selection_buffer.irp.f @@ -57,13 +57,15 @@ subroutine sort_selection_buffer(b) call dsort(absval, iorder, b%cur) do i=1, nmwen - detmp(:,:,i) = b%det(:,:,iorder(i)) + detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) + detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) vals(i) = b%val(iorder(i)) end do - b%det(:,:,:nmwen) = detmp(:,:,:) - b%det(:,:,nmwen+1:) = 0_bit_kind - b%val(:nmwen) = vals(:) - b%val(nmwen+1:) = 0d0 + b%det = 0_bit_kind + b%val = 0d0 + b%det(1:N_int,1,1:nmwen) = detmp(1:N_int,1,1:nmwen) + b%det(1:N_int,2,1:nmwen) = detmp(1:N_int,2,1:nmwen) + b%val(1:nmwen) = vals(1:nmwen) b%mini = max(b%mini,dabs(b%val(b%N))) b%cur = nmwen end subroutine diff --git a/plugins/MRPT_Utils/ezfio_interface.irp.f b/plugins/MRPT_Utils/ezfio_interface.irp.f deleted file mode 100644 index 18fb453e..00000000 --- a/plugins/MRPT_Utils/ezfio_interface.irp.f +++ /dev/null @@ -1,23 +0,0 @@ -! DO NOT MODIFY BY HAND -! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /home/garniron/quantum_package/src/MRPT_Utils/EZFIO.cfg - - -BEGIN_PROVIDER [ logical, do_third_order_1h1p ] - implicit none - BEGIN_DOC -! If true, compute the third order contribution for the 1h1p - END_DOC - - logical :: has - PROVIDE ezfio_filename - - call ezfio_has_mrpt_utils_do_third_order_1h1p(has) - if (has) then - call ezfio_get_mrpt_utils_do_third_order_1h1p(do_third_order_1h1p) - else - print *, 'mrpt_utils/do_third_order_1h1p not found in EZFIO file' - stop 1 - endif - -END_PROVIDER From ffe7eb1fae63069c74988a7828af46783041a7e7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Jan 2017 18:51:01 +0100 Subject: [PATCH 028/106] Forgot pt2_stoch.irp.f --- plugins/Full_CI_ZMQ/fci_routines.irp.f | 6 ---- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 46 ++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 plugins/Full_CI_ZMQ/pt2_stoch.irp.f diff --git a/plugins/Full_CI_ZMQ/fci_routines.irp.f b/plugins/Full_CI_ZMQ/fci_routines.irp.f index d285912a..6eaabbd7 100644 --- a/plugins/Full_CI_ZMQ/fci_routines.irp.f +++ b/plugins/Full_CI_ZMQ/fci_routines.irp.f @@ -56,12 +56,6 @@ subroutine selection_slave_inproc(i) call run_selection_slave(1,i,ci_electronic_energy) end -subroutine pt2_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_pt2_slave(1,i,ci_electronic_energy) -end subroutine selection_collector(b, pt2) use f77_zmq diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 016f5ff8..f0caa44e 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -104,7 +104,7 @@ program fci_zmq E_CI_before(1:N_states) = CI_energy(1:N_states) !call ZMQ_selection(0, pt2)! pour non-stochastic double precision :: relative_error - relative_error=1.d-2 + relative_error=1.d-3 call ZMQ_pt2(pt2,relative_error) print *, 'Final step' print *, 'N_det = ', N_det diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f new file mode 100644 index 00000000..a2ee5e20 --- /dev/null +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -0,0 +1,46 @@ +program pt2_stoch + implicit none + initialize_pt2_E0_denominator = .False. + read_wf = .True. + SOFT_TOUCH initialize_pt2_E0_denominator read_wf + PROVIDE mo_bielec_integrals_in_map + call run +end + +subroutine run + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision, allocatable :: pt2(:) + integer :: degree + integer :: n_det_before, to_select + double precision :: threshold_davidson_in + + double precision :: E_CI_before(N_states), relative_error + + if (.true.) then + call ezfio_get_full_ci_zmq_energy(E_CI_before(1)) + pt2_e0_denominator(:) = E_CI_before(1) - nuclear_repulsion + SOFT_TOUCH pt2_e0_denominator read_wf + endif + allocate (pt2(N_states)) + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + relative_error = 1.d-3 + call ZMQ_pt2(pt2, relative_error) + print *, 'Final step' + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k=1,N_states + print *, 'State', k + print *, 'PT2 = ', pt2 + print *, 'E = ', E_CI_before + print *, 'E+PT2 = ', E_CI_before+pt2 + print *, '-----' + enddo + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) +end + + From 042024f4240d659c27f79fea8f8c55f006e5b3f0 Mon Sep 17 00:00:00 2001 From: Yann GARNIRON Date: Wed, 18 Jan 2017 08:42:17 +0100 Subject: [PATCH 029/106] teeth filling --- plugins/Full_CI_ZMQ/fci_routines.irp.f | 6 -- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 68 ++++++++++++++++---- plugins/Full_CI_ZMQ/selection.irp.f | 2 +- 3 files changed, 58 insertions(+), 18 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_routines.irp.f b/plugins/Full_CI_ZMQ/fci_routines.irp.f index d285912a..6eaabbd7 100644 --- a/plugins/Full_CI_ZMQ/fci_routines.irp.f +++ b/plugins/Full_CI_ZMQ/fci_routines.irp.f @@ -56,12 +56,6 @@ subroutine selection_slave_inproc(i) call run_selection_slave(1,i,ci_electronic_energy) end -subroutine pt2_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_pt2_slave(1,i,ci_electronic_energy) -end subroutine selection_collector(b, pt2) use f77_zmq diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 47651e22..186af429 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,3 +1,10 @@ + + BEGIN_PROVIDER [ integer, fragment_count ] +&BEGIN_PROVIDER [ integer, fragment_first ] + fragment_count = 8 + fragment_first = 4 +END_PROVIDER + subroutine ZMQ_pt2(pt2,relative_error) use f77_zmq use selection_types @@ -54,13 +61,13 @@ subroutine ZMQ_pt2(pt2,relative_error) generator_per_task = 1 do i=1,tbc(0) i_generator_end = min(i+generator_per_task-1, tbc(0)) - if(tbc(i) > 10) then + if(tbc(i) > fragment_first) then integer :: zero zero = 0 write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) call add_task_to_taskserver(zmq_to_qp_run_socket,task) else - do j=1,8 + do j=1,fragment_count write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do @@ -161,7 +168,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su actually_computed = computed parts_to_get(:) = 1 - parts_to_get(1:10) = 8 + if(fragment_first > 0) parts_to_get(1:fragment_first) = fragment_count do i=1,tbc(0) actually_computed(tbc(i)) = .false. @@ -173,16 +180,17 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) - done = 0 more = 1 time0 = omp_get_wtime() timeLast = time0 + print *, 'N_deterministic = ', first_det_of_teeth(1)-1 pullLoop : do while (more == 1) call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) do i=1,Nindex pt2_detail(:, index(i)) += pt2_mwen(:,i) parts_to_get(index(i)) -= 1 + !print *, index(1) if(parts_to_get(index(i)) < 0) then stop "PARTS ??" end if @@ -195,7 +203,6 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su endif call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do - done += ntask time = omp_get_wtime() @@ -208,13 +215,17 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su end if end do - double precision :: E0, avg, eqt, prop call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) firstTBDcomb = Nabove(1) - orgTBDcomb + 1 if(Nabove(1) < 2.0) cycle call get_first_tooth(actually_computed, tooth) - + + done = 0 + do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1)-1 + if(actually_computed(i)) done = done + 1 + end do + E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) prop = prop / weight(first_det_of_teeth(tooth)) @@ -222,7 +233,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) time = omp_get_wtime() - print "(A, 5(E15.7))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth) + print "(A, 4(E15.7), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) if (dabs(eqt/avg) < relative_error) then relative_error = 0.d0 pt2(1) = avg @@ -288,6 +299,28 @@ subroutine get_first_tooth(computed, first_teeth) end subroutine +subroutine get_last_full_tooth(computed, last_tooth) + implicit none + logical, intent(in) :: computed(N_det_generators) + integer, intent(out) :: last_tooth + integer :: i, j, missing + + last_tooth = 0 + combLoop : do i=comb_teeth-1, 1, -1 + missing = 1 + do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 + if(not(computed(j))) then + missing -= 1 + if(missing < 0) cycle combLoop + end if + end do + last_tooth = i + exit + end do combLoop +end subroutine + + + subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) implicit none double precision, intent(in) :: maxWorkload @@ -295,9 +328,9 @@ subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) integer, intent(inout) :: tbc(0:N_det_generators) integer, intent(out) :: Ncomb logical, intent(inout) :: computed(N_det_generators) - integer :: i, dets(comb_teeth) + integer :: i, j, last_full, dets(comb_teeth) double precision :: myWorkload - + myWorkload = 0d0 do i=1,size(comb) @@ -305,9 +338,22 @@ subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) comb(i) = comb(i) * comb_step call add_comb(comb(i), computed, tbc, myWorkload) Ncomb = i + + call get_last_full_tooth(computed, last_full) + if(Ncomb >= 30 .and. last_full /= 0) then + do j=1,first_det_of_teeth(last_full+1)-1 + if(not(computed(j))) then + tbc(0) += 1 + tbc(tbc(0)) = j + computed(j) = .true. + myWorkload += comb_workload(j) + print *, "filled ", j, "to reach tooth", last_full, "ending at", first_det_of_teeth(last_full+1) + end if + end do + end if + if(myWorkload > maxWorkload .and. i >= 100) exit end do - end subroutine diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 2f524022..4f8c7a40 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -346,7 +346,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p - if(subset == 0 .or. mod(maskInd, 8) == (subset-1)) then + if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then From 48adebac20181ac00b37de5211e7a11dadb5efd5 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 18 Jan 2017 15:53:51 +0100 Subject: [PATCH 030/106] bug correction - fixed zlib link --- configure | 2 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/configure b/configure index cf8d1e03..86fff79f 100755 --- a/configure +++ b/configure @@ -102,7 +102,7 @@ curl = Info( default_path=join(QP_ROOT_BIN, "curl")) zlib = Info( - url='http://www.zlib.net/zlib-1.2.10.tar.gz', + url='http://www.zlib.net/fossils/zlib-1.2.10.tar.gz', description=' zlib', default_path=join(QP_ROOT_LIB, "libz.a")) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 186af429..e41b504f 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -190,7 +190,6 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su do i=1,Nindex pt2_detail(:, index(i)) += pt2_mwen(:,i) parts_to_get(index(i)) -= 1 - !print *, index(1) if(parts_to_get(index(i)) < 0) then stop "PARTS ??" end if @@ -280,22 +279,24 @@ subroutine get_first_tooth(computed, first_teeth) implicit none logical, intent(in) :: computed(N_det_generators) integer, intent(out) :: first_teeth - integer :: i + integer :: i, first_det + first_det = 1 first_teeth = 1 do i=first_det_of_comb, N_det_generators if(not(computed(i))) then - first_teeth = i + first_det = i exit end if end do do i=comb_teeth, 1, -1 - if(first_det_of_teeth(i) < first_teeth) then + if(first_det_of_teeth(i) < first_det) then first_teeth = i exit end if end do + end subroutine From 1045bed51e56fb5a84772d81480c9f76fffb106e Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 18 Jan 2017 15:59:14 +0100 Subject: [PATCH 031/106] Forgot pt2_stoch --- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 plugins/Full_CI_ZMQ/pt2_stoch.irp.f diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f new file mode 100644 index 00000000..de756f02 --- /dev/null +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -0,0 +1,46 @@ +program pt2_stoch + implicit none + initialize_pt2_E0_denominator = .False. + read_wf = .True. + SOFT_TOUCH initialize_pt2_E0_denominator read_wf + PROVIDE mo_bielec_integrals_in_map + call run +end + +subroutine run + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision, allocatable :: pt2(:) + integer :: degree + integer :: n_det_before, to_select + double precision :: threshold_davidson_in + + double precision :: E_CI_before(N_states), relative_error + + if (.true.) then + call ezfio_get_full_ci_zmq_energy(E_CI_before(1)) + pt2_e0_denominator(:) = E_CI_before(1) - nuclear_repulsion + SOFT_TOUCH pt2_e0_denominator read_wf + endif + allocate (pt2(N_states)) + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + relative_error = 1.d-8 + call ZMQ_pt2(pt2, relative_error) + print *, 'Final step' + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k=1,N_states + print *, 'State', k + print *, 'PT2 = ', pt2 + print *, 'E = ', E_CI_before + print *, 'E+PT2 = ', E_CI_before+pt2 + print *, '-----' + enddo + call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1)) +end + + From 9ef173e0585d5d08cf0839f1cc136eefa82b5374 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 18 Jan 2017 16:06:23 +0100 Subject: [PATCH 032/106] Amazing new things --- plugins/Full_CI_ZMQ/fci_routines.irp.f | 4 ++++ plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 2 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 7 +++++-- plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f | 4 ++++ plugins/mrcc_selected/ezfio_interface.irp.f | 2 +- src/Davidson/EZFIO.cfg | 6 ++++++ src/Davidson/diagonalization_hs2.irp.f | 7 +++++-- src/Integrals_Bielec/mo_bi_integrals.irp.f | 2 ++ 8 files changed, 28 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_routines.irp.f b/plugins/Full_CI_ZMQ/fci_routines.irp.f index 6eaabbd7..913bb0e8 100644 --- a/plugins/Full_CI_ZMQ/fci_routines.irp.f +++ b/plugins/Full_CI_ZMQ/fci_routines.irp.f @@ -42,9 +42,13 @@ subroutine ZMQ_selection(N_in, pt2) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, 'selection') + do i=1,N_states + print *, 'E+PT2(', i, ') = ', ci_electronic_energy(i) + pt2(i) + enddo if (N_in > 0) then call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN call copy_H_apply_buffer_to_wf() + call save_wavefunction endif end subroutine diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f index a2ee5e20..71ebf357 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -28,7 +28,7 @@ subroutine run threshold_selectors = 1.d0 threshold_generators = 1d0 - relative_error = 1.d-3 + relative_error = 1.d-6 call ZMQ_pt2(pt2, relative_error) print *, 'Final step' print *, 'N_det = ', N_det diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 47651e22..6423a1b1 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -151,7 +151,8 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su integer, allocatable :: task_id(:) integer :: done, Nindex integer, allocatable :: index(:) - double precision :: time, time0, timeLast + double precision, save :: time0 = -1.d0 + double precision :: time, timeLast double precision, external :: omp_get_wtime integer :: tooth, firstTBDcomb, orgTBDcomb integer, allocatable :: parts_to_get(:) @@ -175,7 +176,9 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) done = 0 more = 1 - time0 = omp_get_wtime() + if (time0 < 0.d0) then + time0 = omp_get_wtime() + endif timeLast = time0 pullLoop : do while (more == 1) diff --git a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f index 77bbab03..04a1d9d4 100644 --- a/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/target_pt2_ratio_zmq.irp.f @@ -78,6 +78,9 @@ program fci_zmq Nmin = N_det else Nmax = N_det + psi_coef_ref = psi_coef + psi_det_ref = psi_det + TOUCH psi_det psi_coef endif N_det = Nmin + (Nmax-Nmin)/2 print *, '-----' @@ -85,6 +88,7 @@ program fci_zmq print *, 'Ratio : ', ratio, ' ~ ', var_pt2_ratio print *, 'N_det = ', N_det print *, 'E = ', CI_energy(1) + call save_wavefunction enddo call ZMQ_selection(0, pt2) print *, '------' diff --git a/plugins/mrcc_selected/ezfio_interface.irp.f b/plugins/mrcc_selected/ezfio_interface.irp.f index 062af449..47e7cea5 100644 --- a/plugins/mrcc_selected/ezfio_interface.irp.f +++ b/plugins/mrcc_selected/ezfio_interface.irp.f @@ -1,6 +1,6 @@ ! DO NOT MODIFY BY HAND ! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /home/scemama/quantum_package/src/mrcc_selected/EZFIO.cfg +! from file /ccc/work/cont003/gen1738/scemama/quantum_package/src/mrcc_selected/EZFIO.cfg BEGIN_PROVIDER [ double precision, thresh_dressed_ci ] diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg index 7724400f..20113732 100644 --- a/src/Davidson/EZFIO.cfg +++ b/src/Davidson/EZFIO.cfg @@ -28,3 +28,9 @@ doc: If true, disk space is used to store the vectors default: False interface: ezfio,provider,ocaml +[distributed_davidson] +type: logical +doc: If true, use the distributed algorithm +default: False +interface: ezfio,provider,ocaml + diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index dccc8ee5..1901525b 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -223,8 +223,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! ----------------------------------------- -! call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) - call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + if (distributed_davidson) then + call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + else + call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + endif ! Compute h_kl = = diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index b56f3518..68c44210 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -35,6 +35,8 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) print*, 'MO integrals provided' return + else + PROVIDE ao_bielec_integrals_in_map endif if(no_vvvv_integrals)then From 27dd2420dd0c36d396bd3a5cf6469d9d7a1eab8a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 25 Jan 2017 16:24:02 +0100 Subject: [PATCH 033/106] Bugs+fragments --- plugins/Full_CI_ZMQ/fci_routines.irp.f | 117 ------------------- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 8 +- plugins/Full_CI_ZMQ/selection.irp.f | 21 +--- 4 files changed, 7 insertions(+), 141 deletions(-) delete mode 100644 plugins/Full_CI_ZMQ/fci_routines.irp.f diff --git a/plugins/Full_CI_ZMQ/fci_routines.irp.f b/plugins/Full_CI_ZMQ/fci_routines.irp.f deleted file mode 100644 index 913bb0e8..00000000 --- a/plugins/Full_CI_ZMQ/fci_routines.irp.f +++ /dev/null @@ -1,117 +0,0 @@ -subroutine ZMQ_selection(N_in, pt2) - use f77_zmq - use selection_types - - implicit none - - character*(512) :: task - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer, intent(in) :: N_in - type(selection_buffer) :: b - integer :: i, N - integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - - - N = max(N_in,1) - provide nproc - provide ci_electronic_energy - call new_parallel_job(zmq_to_qp_run_socket,"selection") - call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(N, N*8, b) - - integer :: i_generator, i_generator_start, i_generator_max, step -! step = int(max(1.,10*elec_num/mo_tot_num) - - step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) - step = max(1,step) - do i= N_det_generators, 1, -step - i_generator_start = max(i-step+1,1) - i_generator_max = i - write(task,*) i_generator_start, i_generator_max, 1, N - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - - !$OMP PARALLEL DEFAULT(none) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) shared(ci_electronic_energy_is_built, n_det_generators_is_built, n_states_is_built, n_int_is_built, nproc_is_built) - i = omp_get_thread_num() - if (i==0) then - call selection_collector(b, pt2) - else - call selection_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'selection') - do i=1,N_states - print *, 'E+PT2(', i, ') = ', ci_electronic_energy(i) + pt2(i) - enddo - if (N_in > 0) then - call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN - call copy_H_apply_buffer_to_wf() - call save_wavefunction - endif -end subroutine - - -subroutine selection_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_selection_slave(1,i,ci_electronic_energy) -end - - -subroutine selection_collector(b, pt2) - use f77_zmq - use selection_types - use bitmasks - implicit none - - - type(selection_buffer), intent(inout) :: b - double precision, intent(out) :: pt2(N_states) - double precision :: pt2_mwen(N_states) - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - integer(ZMQ_PTR) :: zmq_socket_pull - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, ntask - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) - integer, allocatable :: task_id(:) - integer :: done - real :: time, time0 - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) - done = 0 - more = 1 - pt2(:) = 0d0 - call CPU_TIME(time0) - do while (more == 1) - call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) - pt2 += pt2_mwen - do i=1, N - call add_to_selection_buffer(b, det(1,1,i), val(i)) - end do - - do i=1, ntask - if(task_id(i) == 0) then - print *, "Error in collector" - endif - call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) - end do - done += ntask - call CPU_TIME(time) -! print *, "DONE" , done, time - time0 - end do - - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_pull_socket(zmq_socket_pull) - call sort_selection_buffer(b) -end subroutine - diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index f0caa44e..31d117a6 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -69,7 +69,7 @@ program fci_zmq n_det_before = N_det to_select = N_det - to_select = max(64-to_select, to_select) + to_select = max(64-N_det, to_select) to_select = min(to_select, N_det_max-n_det_before) call ZMQ_selection(to_select, pt2) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 062709a3..7d21bb56 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,8 +1,8 @@ BEGIN_PROVIDER [ integer, fragment_count ] &BEGIN_PROVIDER [ integer, fragment_first ] - fragment_count = 8 - fragment_first = 4 + fragment_count = 400 + fragment_first = 1000 END_PROVIDER subroutine ZMQ_pt2(pt2,relative_error) @@ -36,7 +36,7 @@ subroutine ZMQ_pt2(pt2,relative_error) provide nproc - !call random_seed() + call random_seed() computed = .false. tbc(0) = first_det_of_comb - 1 @@ -311,7 +311,7 @@ subroutine get_last_full_tooth(computed, last_tooth) last_tooth = 0 combLoop : do i=comb_teeth-1, 1, -1 - missing = 1 + missing = 1+ (first_det_of_teeth(i+1)-first_det_of_teeth(i))/100 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 if(not(computed(j))) then missing -= 1 diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 4f8c7a40..7ca0f72f 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -337,18 +337,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first - !if(subset /= 0 .and. mod(maskInd, 10) /= (subset-1)) then - ! maskInd += 1 - ! cycle - !end if - maskInd += 1 - - - - - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then - - h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) @@ -400,12 +388,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p - - - end if - - - do s2=s1,2 sp = s1 @@ -418,7 +400,8 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) - if(subset == 0 .or. mod(maskInd, 8) == (subset-1)) then + maskInd += 1 + if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then h2 = hole_list(i2,s2) call apply_hole(pmask, s2,h2, mask, ok, N_int) banned = .false. From 9e73ed6b1c59bc192bf88fc58ab479925aefd6ff Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 25 Jan 2017 17:42:25 +0100 Subject: [PATCH 034/106] Efficient fragmentation --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 8 +-- plugins/Full_CI_ZMQ/selection.irp.f | 63 +++++++++++++++----- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 7d21bb56..98ef0b49 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,8 +1,8 @@ BEGIN_PROVIDER [ integer, fragment_count ] &BEGIN_PROVIDER [ integer, fragment_first ] - fragment_count = 400 - fragment_first = 1000 + fragment_count = (elec_alpha_num-n_core_orb)**2 + fragment_first = first_det_of_teeth(1) END_PROVIDER subroutine ZMQ_pt2(pt2,relative_error) @@ -473,7 +473,3 @@ END_PROVIDER - - - - diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 7ca0f72f..85d5c39d 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -284,7 +284,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: monoAdo, monoBdo; integer :: maskInd - maskInd = -1 monoAdo = .true. monoBdo = .true. @@ -306,7 +305,36 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + ! ====== + ! If the subset doesn't exist, return + logical :: will_compute + will_compute = subset == 0 + maskInd = -1 + + if (.not.will_compute) then + outerloop: do s1=1,2 + do i1=N_holes(s1),1,-1 ! Generate low excitations first + do s2=s1,2 + do i2=N_holes(s2),ib,-1 ! Generate low excitations first + maskInd += 1 + if(mod(maskInd, fragment_count) == (subset-1)) then + will_compute = .True. + exit outerloop + end if + enddo + enddo + enddo + enddo outerloop + if (.not.will_compute) then + return + endif + endif + ! ====== + + integer(bit_kind), allocatable:: preinteresting_det(:,:,:) + allocate (preinteresting_det(N_int,2,N_det)) + preinteresting(0) = 0 prefullinteresting(0) = 0 @@ -320,13 +348,14 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do j=1,N_int mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do if(nt <= 4) then if(i <= N_det_selectors) then preinteresting(0) += 1 preinteresting(preinteresting(0)) = i + preinteresting_det(:,:,preinteresting(0)) = psi_det_sorted(:,:,i) else if(nt <= 2) then prefullinteresting(0) += 1 prefullinteresting(prefullinteresting(0)) = i @@ -334,28 +363,28 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end if end do - + + maskInd = -1 do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) - do i=1,N_int - negMask(i,1) = not(pmask(i,1)) - negMask(i,2) = not(pmask(i,2)) - end do + negMask = not(pmask) interesting(0) = 0 fullinteresting(0) = 0 do ii=1,preinteresting(0) i = preinteresting(ii) - nt = 0 - do j=1,N_int - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii)) + mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + do j=2,N_int + mobMask(j,1) = iand(negMask(j,1), preinteresting_det(j,1,ii)) + mobMask(j,2) = iand(negMask(j,2), preinteresting_det(j,2,ii)) + nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do if(nt <= 4) then @@ -373,10 +402,13 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do ii=1,prefullinteresting(0) i = prefullinteresting(ii) nt = 0 - do j=1,N_int + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + do j=2,N_int mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do if(nt <= 2) then @@ -521,6 +553,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere ! logical :: bandon ! ! bandon = .false. + PROVIDE psi_phasemask psi_selectors_coef_transp mat = 0d0 do i=1,N_int @@ -535,7 +568,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere do j=1,N_int mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) - nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do if(nt > 4) cycle From b08ced874114d2c69a518e53d90fdb9f32867df4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 25 Jan 2017 18:52:44 +0100 Subject: [PATCH 035/106] Simplification --- plugins/Full_CI_ZMQ/selection.irp.f | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 85d5c39d..32c635ec 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -309,22 +309,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p ! If the subset doesn't exist, return logical :: will_compute will_compute = subset == 0 - maskInd = -1 if (.not.will_compute) then - outerloop: do s1=1,2 - do i1=N_holes(s1),1,-1 ! Generate low excitations first - do s2=s1,2 - do i2=N_holes(s2),ib,-1 ! Generate low excitations first - maskInd += 1 - if(mod(maskInd, fragment_count) == (subset-1)) then - will_compute = .True. - exit outerloop - end if - enddo - enddo - enddo - enddo outerloop + maskInd = N_holes(1)*N_holes(2) + N_holes(2)*((N_holes(2)-1)/2) + N_holes(1)*((N_holes(1)-1)/2) + will_compute = (maskInd >= subset) if (.not.will_compute) then return endif @@ -365,8 +353,23 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p maskInd = -1 + integer :: nb_count do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first +! will_compute = (subset == 0) +! nb_count = 0 +! if (s1==1) then +! nb_count = N_holes(1)-i1 + N_holes(2) +! else +! nb_count = N_holes(2)-i1 +! endif +! maskInd = 12345 +! fragment_count = 400 +! subset = 3 +! nb_count = 100 +! if( nb_count >= (fragment_count - mod(maskInd+1, fragment_count) + subset-1) ) then +! will_compute = .true. +! end if h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) From 097083db4743c88d5cea09044ae38eb841fdf936 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 30 Jan 2017 09:38:04 +0100 Subject: [PATCH 036/106] Repaired selection --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 38 ++++++------ plugins/Full_CI_ZMQ/selection.irp.f | 19 ++---- plugins/Psiref_CAS/psi_ref.irp.f | 1 + plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f | 63 ++++++++++++++++++-- plugins/mrsc2_no_amp/sc2_no_amp.irp.f | 5 ++ src/Determinants/print_wf.irp.f | 46 +++++++------- 6 files changed, 109 insertions(+), 63 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 98ef0b49..f34242ab 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,7 +1,5 @@ - - BEGIN_PROVIDER [ integer, fragment_count ] -&BEGIN_PROVIDER [ integer, fragment_first ] - fragment_count = (elec_alpha_num-n_core_orb)**2 +BEGIN_PROVIDER [ integer, fragment_first ] + implicit none fragment_first = first_det_of_teeth(1) END_PROVIDER @@ -111,7 +109,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, myVal = 0d0 myVal2 = 0d0 do j=comb_teeth,1,-1 - myVal += pt2_detail(1, dets(j)) / weight(dets(j)) * comb_step + myVal += pt2_detail(1, dets(j)) / pt2_weight(dets(j)) * comb_step sumabove(j) += myVal sum2above(j) += myVal**2 Nabove(j) += 1 @@ -229,8 +227,8 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su end do E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) - prop = prop / weight(first_det_of_teeth(tooth)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) + prop = prop / pt2_weight(first_det_of_teeth(tooth)) E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) @@ -393,7 +391,7 @@ subroutine get_comb(stato, dets) curs = 1d0 - stato do j = comb_teeth, 1, -1 - dets(j) = pt2_find(curs, cweight) + dets(j) = pt2_find(curs, pt2_cweight) curs -= comb_step end do end subroutine @@ -421,8 +419,8 @@ end subroutine - BEGIN_PROVIDER [ double precision, weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, cweight, (N_det_generators) ] + BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_workload, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_step ] &BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] @@ -432,34 +430,34 @@ end subroutine double precision :: norm_left, stato integer, external :: pt2_find - weight(1) = psi_coef_generators(1,1)**2 - cweight(1) = psi_coef_generators(1,1)**2 + pt2_weight(1) = psi_coef_generators(1,1)**2 + pt2_cweight(1) = psi_coef_generators(1,1)**2 do i=2,N_det_generators - weight(i) = psi_coef_generators(i,1)**2 - cweight(i) = cweight(i-1) + psi_coef_generators(i,1)**2 + pt2_weight(i) = psi_coef_generators(i,1)**2 + pt2_cweight(i) = pt2_cweight(i-1) + psi_coef_generators(i,1)**2 end do - weight = weight / cweight(N_det_generators) - cweight = cweight / cweight(N_det_generators) + pt2_weight = pt2_weight / pt2_cweight(N_det_generators) + pt2_cweight = pt2_cweight / pt2_cweight(N_det_generators) comb_workload = 1d0 / dfloat(N_det_generators) norm_left = 1d0 comb_step = 1d0/dfloat(comb_teeth) do i=1,N_det_generators - if(weight(i)/norm_left < comb_step/2d0) then + if(pt2_weight(i)/norm_left < comb_step/2d0) then first_det_of_comb = i exit end if - norm_left -= weight(i) + norm_left -= pt2_weight(i) end do - comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - cweight(first_det_of_comb-1)) + comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - pt2_cweight(first_det_of_comb-1)) stato = 1d0 - comb_step! + 1d-5 do i=comb_teeth, 1, -1 - first_det_of_teeth(i) = pt2_find(stato, cweight) + first_det_of_teeth(i) = pt2_find(stato, pt2_cweight) stato -= comb_step end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 32c635ec..86e9e9f2 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -1,5 +1,10 @@ use bitmasks +BEGIN_PROVIDER [ integer, fragment_count ] + implicit none + fragment_count = (elec_alpha_num-n_core_orb)**2 +END_PROVIDER + double precision function integral8(i,j,k,l) implicit none @@ -356,20 +361,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p integer :: nb_count do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first -! will_compute = (subset == 0) -! nb_count = 0 -! if (s1==1) then -! nb_count = N_holes(1)-i1 + N_holes(2) -! else -! nb_count = N_holes(2)-i1 -! endif -! maskInd = 12345 -! fragment_count = 400 -! subset = 3 -! nb_count = 100 -! if( nb_count >= (fragment_count - mod(maskInd+1, fragment_count) + subset-1) ) then -! will_compute = .true. -! end if h1 = hole_list(i1,s1) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) diff --git a/plugins/Psiref_CAS/psi_ref.irp.f b/plugins/Psiref_CAS/psi_ref.irp.f index ab9e6943..87439764 100644 --- a/plugins/Psiref_CAS/psi_ref.irp.f +++ b/plugins/Psiref_CAS/psi_ref.irp.f @@ -77,6 +77,7 @@ END_PROVIDER norm_psi_ref(j) += psi_ref_coef(i,j) * psi_ref_coef(i,j) enddo inv_norm_psi_ref(j) = 1.d0/(dsqrt(norm_psi_Ref(j))) + print *, inv_norm_psi_ref(j) enddo END_PROVIDER diff --git a/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f b/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f index b8b021e8..e4555d8c 100644 --- a/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f +++ b/plugins/mrsc2_no_amp/mrsc2_no_amp.irp.f @@ -4,44 +4,95 @@ implicit none integer :: i,j,k,l integer, allocatable :: idx(:) + integer, allocatable :: holes_part(:,:) double precision, allocatable :: e_corr(:,:) double precision, allocatable :: accu(:) double precision, allocatable :: ihpsi_current(:) double precision, allocatable :: H_jj(:),H_jj_total(:),S2_jj(:) + integer :: number_of_particles, number_of_holes, n_h,n_p allocate(e_corr(N_det_non_ref,N_states),ihpsi_current(N_states),accu(N_states),H_jj(N_det_non_ref),idx(0:N_det_non_ref)) allocate(H_jj_total(N_det),S2_jj(N_det)) + allocate(holes_part(N_det,2)) accu = 0.d0 do i = 1, N_det_non_ref - call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef_interm_norm, N_int, N_det_ref,& + holes_part(i,1) = number_of_holes(psi_non_ref(1,1,i)) + holes_part(i,2) = number_of_particles(psi_non_ref(1,1,i)) + call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& size(psi_ref_coef_interm_norm,1), N_states,ihpsi_current) do j = 1, N_states - e_corr(i,j) = psi_non_ref_coef_interm_norm(i,j) * ihpsi_current(j) + e_corr(i,j) = psi_non_ref_coef(i,j) * ihpsi_current(j) * inv_norm_psi_ref(j) accu(j) += e_corr(i,j) enddo enddo + print *, 'accu = ',accu double precision :: hjj,diag_h_mat_elem do i = 1, N_det_non_ref - call filter_not_connected(psi_non_ref,psi_non_ref(1,1,i),N_int,N_det_non_ref,idx) H_jj(i) = 0.d0 + n_h = holes_part(i,1) + n_p = holes_part(i,2) + integer :: degree +! do j = 1, N_det_non_ref +! call get_excitation_degree(psi_non_ref(1,1,i),psi_non_ref(1,1,j),degree,N_int) +! if(degree .gt. 2)then +! if(n_h + holes_part(j,1) .gt. 2 .or. n_p + holes_part(j,2) .gt. 2 ) then +! H_jj(i) += e_corr(j,1) +! endif +! endif +! enddo + call filter_not_connected(psi_non_ref,psi_non_ref(1,1,i),N_int,N_det_non_ref,idx) do j = 1, idx(0) - H_jj(i) += e_corr(idx(j),1) + if(n_h + holes_part(idx(j),1) .gt. 2 .or. n_p + holes_part(idx(j),2) .gt. 2 ) then + H_jj(i) += e_corr(idx(j),1) + endif enddo enddo + do i=1,N_Det H_jj_total(i) = diag_h_mat_elem(psi_det(1,1,i),N_int) call get_s2(psi_det(1,1,i),psi_det(1,1,i),N_int,S2_jj(i)) enddo - do i=1, N_det_non_ref + do i = 1, N_det_non_ref H_jj_total(idx_non_ref(i)) += H_jj(i) enddo + print *, 'coef' call davidson_diag_hjj_sjj(psi_det,CI_eigenvectors_sc2_no_amp,H_jj_total,S2_jj,CI_electronic_energy_sc2_no_amp,size(CI_eigenvectors_sc2_no_amp,1),N_Det,N_states,N_states_diag,N_int,6) + do i = 1, N_det + hjj = diag_h_mat_elem(psi_det(1,1,i),N_int) + ! if(hjj<-210.d0)then + ! call debug_det(psi_det(1,1,i),N_int) + ! print *, CI_eigenvectors_sc2_no_amp((i),1),hjj, H_jj_total(i) + ! endif + enddo + + + + + + print *, 'ref',N_det_ref + do i =1, N_det_ref + call debug_det(psi_det(1,1,idx_ref(i)),N_int) + print *, CI_eigenvectors_sc2_no_amp(idx_ref(i),1), H_jj_total(idx_ref(i)) + enddo + print *, 'non ref',N_det_non_ref + do i=1, N_det_non_ref + hjj = diag_h_mat_elem(psi_non_ref(1,1,i),N_int) +! print *, CI_eigenvectors_sc2_no_amp(idx_non_ref(i),1),H_jj_total(idx_non_ref(i)), H_jj(i) +! if(dabs(CI_eigenvectors_sc2_no_amp(idx_non_ref(i),1)).gt.1.d-1)then +! if(hjj<-210.d0)then +! call debug_det(psi_det(1,1,idx_non_ref(i)),N_int) +! write(*,'(10(F16.10,X))') CI_eigenvectors_sc2_no_amp(idx_non_ref(i),1),hjj, H_jj(i),H_jj_total(idx_non_ref(i)) +! endif + enddo +! do i = 1, N_det +! print *, CI_eigenvectors_sc2_no_amp(i,1) +! enddo do i=1,N_states_diag CI_eigenvectors_s2_sc2_no_amp(i) = S2_jj(i) enddo - deallocate(e_corr,ihpsi_current,accu,H_jj,idx,H_jj_total,s2_jj) + deallocate(e_corr,ihpsi_current,accu,H_jj,idx,H_jj_total,s2_jj,holes_part) END_PROVIDER BEGIN_PROVIDER [ double precision, CI_energy_sc2_no_amp, (N_states_diag) ] diff --git a/plugins/mrsc2_no_amp/sc2_no_amp.irp.f b/plugins/mrsc2_no_amp/sc2_no_amp.irp.f index 622d7236..f557783b 100644 --- a/plugins/mrsc2_no_amp/sc2_no_amp.irp.f +++ b/plugins/mrsc2_no_amp/sc2_no_amp.irp.f @@ -1,9 +1,14 @@ program pouet + provide ao_bielec_integrals_in_map + call bla +end +subroutine bla implicit none integer :: i do i = 1, 10 call diagonalize_CI_sc2_no_amp TOUCH psi_coef enddo + print *, "E+PT2 = ", ci_energy_sc2_no_amp(:) end diff --git a/src/Determinants/print_wf.irp.f b/src/Determinants/print_wf.irp.f index af109e2d..737e4d3e 100644 --- a/src/Determinants/print_wf.irp.f +++ b/src/Determinants/print_wf.irp.f @@ -28,32 +28,32 @@ subroutine routine if(degree == 0)then print*,'Reference determinant ' else - call i_H_j(psi_det(1,1,i),psi_det(1,1,1),N_int,hij) + call i_H_j(psi_det(1,1,i),psi_det(1,1,i),N_int,hij) call get_excitation(psi_det(1,1,1),psi_det(1,1,i),exc,degree,phase,N_int) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) print*,'phase = ',phase - if(degree == 1)then - print*,'s1',s1 - print*,'h1,p1 = ',h1,p1 - if(s1 == 1)then - norm_mono_a += dabs(psi_coef(i,1)/psi_coef(1,1)) - else - norm_mono_b += dabs(psi_coef(i,1)/psi_coef(1,1)) - endif - print*,'< h | Ka| p > = ',get_mo_bielec_integral(h1,list_act(1),list_act(1),p1,mo_integrals_map) - double precision :: hmono,hdouble - call i_H_j_verbose(psi_det(1,1,1),psi_det(1,1,i),N_int,hij,hmono,hdouble) - print*,'hmono = ',hmono - print*,'hdouble = ',hdouble - print*,'hmono+hdouble = ',hmono+hdouble - print*,'hij = ',hij - else - print*,'s1',s1 - print*,'h1,p1 = ',h1,p1 - print*,'s2',s2 - print*,'h2,p2 = ',h2,p2 - print*,'< h | Ka| p > = ',get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map) - endif +! if(degree == 1)then +! print*,'s1',s1 +! print*,'h1,p1 = ',h1,p1 +! if(s1 == 1)then +! norm_mono_a += dabs(psi_coef(i,1)/psi_coef(1,1)) +! else +! norm_mono_b += dabs(psi_coef(i,1)/psi_coef(1,1)) +! endif +! print*,'< h | Ka| p > = ',get_mo_bielec_integral(h1,list_act(1),list_act(1),p1,mo_integrals_map) +! double precision :: hmono,hdouble +! call i_H_j_verbose(psi_det(1,1,1),psi_det(1,1,i),N_int,hij,hmono,hdouble) +! print*,'hmono = ',hmono +! print*,'hdouble = ',hdouble +! print*,'hmono+hdouble = ',hmono+hdouble +! print*,'hij = ',hij +! else +! print*,'s1',s1 +! print*,'h1,p1 = ',h1,p1 +! print*,'s2',s2 +! print*,'h2,p2 = ',h2,p2 +! print*,'< h | Ka| p > = ',get_mo_bielec_integral(h1,h2,p1,p2,mo_integrals_map) +! endif print*,' = ',hij endif From 67fded7d18304b199080eb46eac6a072aa876c39 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 30 Jan 2017 20:15:28 +0100 Subject: [PATCH 037/106] work on pt2 stoch --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index f34242ab..48c6c155 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -192,6 +192,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su pt2_detail(:, index(i)) += pt2_mwen(:,i) parts_to_get(index(i)) -= 1 if(parts_to_get(index(i)) < 0) then + print *, "PARTS ??" stop "PARTS ??" end if if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. @@ -206,7 +207,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su time = omp_get_wtime() - if(time - timeLast > 30.0 .or. more /= 1) then + if(time - timeLast > 10.0 .or. more /= 1) then timeLast = time do i=1, first_det_of_teeth(1)-1 if(not(actually_computed(i))) then @@ -258,7 +259,7 @@ integer function pt2_find(v, w) h = N_det-1 do while(h >= l) - i = (h+l)/2 + i = ishft(h+l,-1) if(w(i+1) > v) then h = i-1 else @@ -374,7 +375,7 @@ subroutine reorder_tbc(tbc) ci = 0 do i=1,N_det_generators if(ltbc(i)) then - ci += 1 + ci = ci+1 tbc(ci) = i end if end do @@ -446,7 +447,7 @@ end subroutine comb_step = 1d0/dfloat(comb_teeth) do i=1,N_det_generators - if(pt2_weight(i)/norm_left < comb_step/2d0) then + if(pt2_weight(i)/norm_left < comb_step*.5d0) then first_det_of_comb = i exit end if @@ -462,7 +463,10 @@ end subroutine end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 first_det_of_teeth(1) = first_det_of_comb - if(first_det_of_teeth(1) /= first_det_of_comb) stop "comb provider" + if(first_det_of_teeth(1) /= first_det_of_comb) then + print *, 'Error in ', irp_here + stop "comb provider" + endif END_PROVIDER From cdd59910ffb417f1d1de3300f1aaa81e7948e5e6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 31 Jan 2017 21:48:47 +0100 Subject: [PATCH 038/106] Corrected for use with new irpf90 --- plugins/Full_CI_ZMQ/selection.irp.f | 32 ++++++++++++++--------------- src/Utils/sort.irp.f | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 32c635ec..22344a7b 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -42,7 +42,7 @@ subroutine assert(cond, msg) print *, "assert fail: "//msg stop end if -end subroutine +end subroutine get_mask_phase(det, phasemask) @@ -64,7 +64,7 @@ subroutine get_mask_phase(det, phasemask) end do end do end do -end subroutine +end subroutine select_connected(i_generator,E0,pt2,b,subset) @@ -92,7 +92,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset) enddo call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset) enddo -end subroutine +end double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) @@ -111,7 +111,7 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1_1 get_phase_bi = res(iand(np,1_1)) -end subroutine +end @@ -170,7 +170,7 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) vect(:, puti) += hij * coefs end if end if -end subroutine +end @@ -233,7 +233,7 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) call apply_particle(mask, sp, p1, det, ok, N_int) call i_h_j(gen, det, N_int, hij) vect(:, p1) += hij * coefs -end subroutine +end subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) @@ -259,7 +259,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) vect(:, i) += hij * coefs end do -end subroutine +end subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) use bitmasks @@ -471,7 +471,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p enddo enddo enddo -end subroutine +end @@ -537,7 +537,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d end if end do end do -end subroutine +end subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) @@ -612,7 +612,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere end if end if end do -end subroutine +end subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) @@ -730,7 +730,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end if end if end if -end subroutine +end subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) @@ -897,7 +897,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) mat(:, p1, p2) += coefs * hij end do end do -end subroutine +end @@ -959,7 +959,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end do end if -end subroutine +end subroutine past_d1(bannedOrb, p) @@ -975,7 +975,7 @@ subroutine past_d1(bannedOrb, p) bannedOrb(p(i, s), s) = .true. end do end do -end subroutine +end subroutine past_d2(banned, p, sp) @@ -1000,7 +1000,7 @@ subroutine past_d2(banned, p, sp) end do end do end if -end subroutine +end @@ -1045,5 +1045,5 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) call bitstring_to_list(myMask(1,2), list(na+1), nb, N_int) banned(list(1), list(2)) = .true. end do genl -end subroutine +end diff --git a/src/Utils/sort.irp.f b/src/Utils/sort.irp.f index b0decc33..dd7fbc33 100644 --- a/src/Utils/sort.irp.f +++ b/src/Utils/sort.irp.f @@ -156,7 +156,7 @@ BEGIN_TEMPLATE iorder(i) = i0 enddo - end subroutine heap_$Xsort$big + end subroutine heap_$Xsort_big subroutine $Xsort(x,iorder,isize) implicit none @@ -248,7 +248,7 @@ BEGIN_TEMPLATE iorder(j+1_8) = i0 enddo - end subroutine insertion_$Xsort + end subroutine insertion_$Xsort_big subroutine $Xset_order_big(x,iorder,isize) implicit none From edc3cde21115de494188877751d2498bc537468b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 31 Jan 2017 21:52:31 +0100 Subject: [PATCH 039/106] Corrected bug in PT2 --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 4 +- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 94 +++++++++++++------- src/Utils/map_functions.irp.f | 15 +++- src/ZMQ/utils.irp.f | 47 +++++++++- 4 files changed, 117 insertions(+), 43 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 31d117a6..a0d1a5ea 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -102,10 +102,10 @@ program fci_zmq threshold_selectors = 1.d0 threshold_generators = 1d0 E_CI_before(1:N_states) = CI_energy(1:N_states) - !call ZMQ_selection(0, pt2)! pour non-stochastic double precision :: relative_error relative_error=1.d-3 - call ZMQ_pt2(pt2,relative_error) + !call ZMQ_pt2(pt2,relative_error) + call ZMQ_selection(0, pt2)! pour non-stochastic print *, 'Final step' print *, 'N_det = ', N_det print *, 'N_states = ', N_states diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 48c6c155..8c9db16d 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -27,16 +27,17 @@ subroutine ZMQ_pt2(pt2,relative_error) double precision, external :: omp_get_wtime double precision :: time0, time - allocate(pt2_detail(N_states, N_det_generators), comb(100000), computed(N_det_generators), tbc(0:N_det_generators)) + allocate(pt2_detail(N_states, N_det_generators), comb(10**5), computed(N_det_generators), tbc(0:size_tbc)) sumabove = 0d0 sum2above = 0d0 Nabove = 0d0 provide nproc - call random_seed() + !call random_seed() computed = .false. + tbc(0) = first_det_of_comb - 1 do i=1, tbc(0) tbc(i) = i @@ -44,19 +45,21 @@ subroutine ZMQ_pt2(pt2,relative_error) end do pt2_detail = 0d0 - time0 = omp_get_wtime() print *, "grep - time - avg - err - n_combs" do while(.true.) + call write_time(6) call new_parallel_job(zmq_to_qp_run_socket,"pt2") call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) call zmq_set_running(zmq_to_qp_run_socket) call create_selection_buffer(1, 1*2, b) ! TODO PARAMETER : 1.d-2 - call get_carlo_workbatch(1d-2, computed, comb, Ncomb, tbc) + Ncomb=size(comb) + call get_carlo_workbatch(1d0, computed, comb, Ncomb, tbc) generator_per_task = 1 + print *, 'Adding tasks...' do i=1,tbc(0) i_generator_end = min(i+generator_per_task-1, tbc(0)) if(tbc(i) > fragment_first) then @@ -71,6 +74,7 @@ subroutine ZMQ_pt2(pt2,relative_error) end do end if end do + call write_time(6) !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, relative_error) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() @@ -91,7 +95,7 @@ end subroutine subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) - integer, intent(in) :: tbc(0:N_det_generators), Ncomb + integer, intent(in) :: tbc(0:size_tbc), Ncomb logical, intent(in) :: computed(N_det_generators) double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators) double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) @@ -101,7 +105,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, mainLoop : do i=1,Ncomb call get_comb(comb(i), dets) do j=1,comb_teeth - if(not(computed(dets(j)))) then + if(.not.(computed(dets(j)))) then exit mainLoop end if end do @@ -109,7 +113,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, myVal = 0d0 myVal2 = 0d0 do j=comb_teeth,1,-1 - myVal += pt2_detail(1, dets(j)) / pt2_weight(dets(j)) * comb_step + myVal += pt2_detail(1, dets(j)) * pt2_weight_inv(dets(j)) * comb_step sumabove(j) += myVal sum2above(j) += myVal**2 Nabove(j) += 1 @@ -136,7 +140,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) double precision, intent(in) :: comb(Ncomb) logical, intent(inout) :: computed(N_det_generators) - integer, intent(in) :: tbc(0:N_det_generators) + integer, intent(in) :: tbc(0:size_tbc) double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth), relative_error double precision, intent(out) :: pt2(N_states) @@ -164,7 +168,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su logical, allocatable :: actually_computed(:) allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) - actually_computed = computed + actually_computed(:) = computed(:) parts_to_get(:) = 1 if(fragment_first > 0) parts_to_get(1:fragment_first) = fragment_count @@ -192,7 +196,9 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su pt2_detail(:, index(i)) += pt2_mwen(:,i) parts_to_get(index(i)) -= 1 if(parts_to_get(index(i)) < 0) then + print *, i, index(i), parts_to_get(index(i)), Nindex print *, "PARTS ??" + print *, parts_to_get stop "PARTS ??" end if if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. @@ -207,10 +213,10 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su time = omp_get_wtime() - if(time - timeLast > 10.0 .or. more /= 1) then + if(time - timeLast > 1d1 .or. more /= 1) then timeLast = time do i=1, first_det_of_teeth(1)-1 - if(not(actually_computed(i))) then + if(.not.(actually_computed(i))) then print *, "PT2 : deterministic part not finished" cycle pullLoop end if @@ -219,7 +225,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su double precision :: E0, avg, eqt, prop call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) firstTBDcomb = Nabove(1) - orgTBDcomb + 1 - if(Nabove(1) < 2.0) cycle + if(Nabove(1) < 2d0) cycle call get_first_tooth(actually_computed, tooth) done = 0 @@ -229,7 +235,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop / pt2_weight(first_det_of_teeth(tooth)) + prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) @@ -250,9 +256,10 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su end subroutine -integer function pt2_find(v, w) +integer function pt2_find(v, w, sze) implicit none - double precision :: v, w(N_det) + integer, intent(in) :: sze + double precision, intent(in) :: v, w(sze) integer :: i,l,h l = 0 @@ -286,7 +293,7 @@ subroutine get_first_tooth(computed, first_teeth) first_det = 1 first_teeth = 1 do i=first_det_of_comb, N_det_generators - if(not(computed(i))) then + if(.not.(computed(i))) then first_det = i exit end if @@ -309,10 +316,10 @@ subroutine get_last_full_tooth(computed, last_tooth) integer :: i, j, missing last_tooth = 0 - combLoop : do i=comb_teeth-1, 1, -1 - missing = 1+ (first_det_of_teeth(i+1)-first_det_of_teeth(i))/100 + combLoop : do i=comb_teeth, 1, -1 + missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-7) ! /128 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 - if(not(computed(j))) then + if(.not.computed(j)) then missing -= 1 if(missing < 0) cycle combLoop end if @@ -323,29 +330,34 @@ subroutine get_last_full_tooth(computed, last_tooth) end subroutine +BEGIN_PROVIDER [ integer, size_tbc ] + size_tbc = N_det_generators + fragment_count*fragment_first +END_PROVIDER + subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) implicit none double precision, intent(in) :: maxWorkload - double precision, intent(out) :: comb(N_det_generators) - integer, intent(inout) :: tbc(0:N_det_generators) - integer, intent(out) :: Ncomb + double precision, intent(out) :: comb(Ncomb) + integer, intent(inout) :: tbc(0:size_tbc) + integer, intent(inout) :: Ncomb logical, intent(inout) :: computed(N_det_generators) integer :: i, j, last_full, dets(comb_teeth) double precision :: myWorkload myWorkload = 0d0 + call RANDOM_NUMBER(comb) do i=1,size(comb) - call RANDOM_NUMBER(comb(i)) comb(i) = comb(i) * comb_step + !DIR$ FORCEINLINE call add_comb(comb(i), computed, tbc, myWorkload) Ncomb = i call get_last_full_tooth(computed, last_full) if(Ncomb >= 30 .and. last_full /= 0) then do j=1,first_det_of_teeth(last_full+1)-1 - if(not(computed(j))) then + if(.not.(computed(j))) then tbc(0) += 1 tbc(tbc(0)) = j computed(j) = .true. @@ -355,25 +367,25 @@ subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) end do end if - if(myWorkload > maxWorkload .and. i >= 100) exit + if(myWorkload > maxWorkload) exit end do end subroutine subroutine reorder_tbc(tbc) implicit none - integer, intent(inout) :: tbc(0:N_det_generators) + integer, intent(inout) :: tbc(0:size_tbc) logical, allocatable :: ltbc(:) integer :: i, ci - allocate(ltbc(N_det_generators)) - ltbc = .false. + allocate(ltbc(size_tbc)) + ltbc(:) = .false. do i=1,tbc(0) ltbc(tbc(i)) = .true. end do ci = 0 - do i=1,N_det_generators + do i=1,size_tbc if(ltbc(i)) then ci = ci+1 tbc(ci) = i @@ -392,7 +404,8 @@ subroutine get_comb(stato, dets) curs = 1d0 - stato do j = comb_teeth, 1, -1 - dets(j) = pt2_find(curs, pt2_cweight) + !DIR$ FORCEINLINE + dets(j) = pt2_find(curs, pt2_cweight,N_det_generators) curs -= comb_step end do end subroutine @@ -403,13 +416,14 @@ subroutine add_comb(comb, computed, tbc, workload) double precision, intent(in) :: comb logical, intent(inout) :: computed(N_det_generators) double precision, intent(inout) :: workload - integer, intent(inout) :: tbc(0:N_det_generators) + integer, intent(inout) :: tbc(0:size_tbc) integer :: i, dets(comb_teeth) + !DIR$ FORCEINLINE call get_comb(comb, dets) do i = 1, comb_teeth - if(not(computed(dets(i)))) then + if(.not.(computed(dets(i)))) then tbc(0) += 1 tbc(tbc(0)) = dets(i) workload += comb_workload(dets(i)) @@ -454,11 +468,11 @@ end subroutine norm_left -= pt2_weight(i) end do - comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - pt2_cweight(first_det_of_comb-1)) + comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step stato = 1d0 - comb_step! + 1d-5 do i=comb_teeth, 1, -1 - first_det_of_teeth(i) = pt2_find(stato, pt2_cweight) + first_det_of_teeth(i) = pt2_find(stato, pt2_cweight, N_det_generators) stato -= comb_step end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 @@ -470,6 +484,18 @@ end subroutine END_PROVIDER +BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ] + implicit none + BEGIN_DOC +! Inverse of pt2_weight array + END_DOC + integer :: i + do i=1,N_det_generators + pt2_weight_inv(i) = 1.d0/pt2_weight(i) + enddo + +END_PROVIDER + diff --git a/src/Utils/map_functions.irp.f b/src/Utils/map_functions.irp.f index 68ba342c..e3745d05 100644 --- a/src/Utils/map_functions.irp.f +++ b/src/Utils/map_functions.irp.f @@ -73,10 +73,11 @@ subroutine map_load_from_disk(filename,map) implicit none character*(*), intent(in) :: filename type(map_type), intent(inout) :: map + double precision :: x type(c_ptr) :: c_pointer(3) integer :: fd(3) - integer*8 :: i,k - integer :: n_elements + integer*8 :: i,k, l + integer :: n_elements, j @@ -95,7 +96,9 @@ subroutine map_load_from_disk(filename,map) call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) + l = 0_8 k = 1_8 + x = 0.d0 do i=0_8, map % map_size deallocate(map % map(i) % value) deallocate(map % map(i) % key) @@ -106,9 +109,15 @@ subroutine map_load_from_disk(filename,map) k = map % consolidated_idx (i+2) map % map(i) % map_size = n_elements map % map(i) % n_elements = n_elements + ! Load memory from disk + do j=1,n_elements + x = x + map % map(i) % value(j) + l = iand(l,map % map(i) % key(j)) + enddo enddo + map % sorted = x>0 .or. l == 0_8 map % n_elements = k-1 - map % sorted = .True. + map % sorted = map % sorted .or. .True. map % consolidated = .True. end diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 3177d3e3..5ffe9ee2 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -696,8 +696,7 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task) endif rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) - message = trim(message(1:rc)) - if (trim(message) /= 'ok') then + if (message(1:rc) /= 'ok') then print *, trim(task) print *, 'Unable to add the next task' stop -1 @@ -705,6 +704,47 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task) end +subroutine add_task_to_taskserver_send(zmq_to_qp_run_socket,task) + use f77_zmq + implicit none + BEGIN_DOC + ! Get a task from the task server + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + character*(*), intent(in) :: task + + integer :: rc, sze + character*(512) :: message + write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task) + + sze = len(trim(message)) + rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) + if (rc /= sze) then + print *, rc, sze + print *, irp_here,': f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)' + stop 'error' + endif + +end + +subroutine add_task_to_taskserver_recv(zmq_to_qp_run_socket) + use f77_zmq + implicit none + BEGIN_DOC + ! Get a task from the task server + END_DOC + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + + integer :: rc, sze + character*(512) :: message + rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) + if (message(1:rc) /= 'ok') then + print *, 'Unable to add the next task' + stop -1 + endif + +end + subroutine task_done_to_taskserver(zmq_to_qp_run_socket, worker_id, task_id) use f77_zmq implicit none @@ -726,8 +766,7 @@ subroutine task_done_to_taskserver(zmq_to_qp_run_socket, worker_id, task_id) endif rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) - message = trim(message(1:rc)) - if (trim(message) /= 'ok') then + if (trim(message(1:rc)) /= 'ok') then print *, 'Unable to send task_done message' stop -1 endif From aac30f9b66be285605b58980b29a809db0261718 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 11:29:17 +0100 Subject: [PATCH 040/106] Removed PUSH/PULL --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 170 +++++++++++------- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 4 +- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 4 +- .../ao_bielec_integrals_in_map_slave.irp.f | 22 +-- src/ZMQ/utils.irp.f | 8 +- 5 files changed, 123 insertions(+), 85 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 8c9db16d..b96cf883 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -10,7 +10,7 @@ subroutine ZMQ_pt2(pt2,relative_error) implicit none character*(512) :: task - integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_to_qp_run_socket2 type(selection_buffer) :: b integer, external :: omp_get_thread_num double precision, intent(in) :: relative_error @@ -27,12 +27,12 @@ subroutine ZMQ_pt2(pt2,relative_error) double precision, external :: omp_get_wtime double precision :: time0, time - allocate(pt2_detail(N_states, N_det_generators), comb(10**5), computed(N_det_generators), tbc(0:size_tbc)) + allocate(pt2_detail(N_states, N_det_generators), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) sumabove = 0d0 sum2above = 0d0 Nabove = 0d0 - provide nproc + provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral !call random_seed() @@ -47,42 +47,63 @@ subroutine ZMQ_pt2(pt2,relative_error) pt2_detail = 0d0 time0 = omp_get_wtime() print *, "grep - time - avg - err - n_combs" + generator_per_task = 1 do while(.true.) call write_time(6) call new_parallel_job(zmq_to_qp_run_socket,"pt2") call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) - call zmq_set_running(zmq_to_qp_run_socket) call create_selection_buffer(1, 1*2, b) - ! TODO PARAMETER : 1.d-2 Ncomb=size(comb) - call get_carlo_workbatch(1d0, computed, comb, Ncomb, tbc) - generator_per_task = 1 - print *, 'Adding tasks...' - do i=1,tbc(0) - i_generator_end = min(i+generator_per_task-1, tbc(0)) - if(tbc(i) > fragment_first) then - integer :: zero - zero = 0 - write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - else - do j=1,fragment_count - write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - end if - end do + call get_carlo_workbatch(computed, comb, Ncomb, tbc) + call write_time(6) - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, relative_error) PRIVATE(i) NUM_THREADS(nproc+1) + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, relative_error) NUM_THREADS(nproc+1) & + !$OMP PRIVATE(i,zmq_to_qp_run_socket2,i_generator_end,task,j) + zmq_to_qp_run_socket2 = new_zmq_to_qp_run_socket() + + !$OMP DO SCHEDULE(static,1) + do i=1,min(2000,tbc(0)) + i_generator_end = min(i+generator_per_task-1, tbc(0)) + if(tbc(i) > fragment_first) then + write(task,*) (i_generator_end-i+1), 0, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket2,task) + else + do j=1,fragment_count + write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket2,task) + end do + end if + end do + !$OMP END DO NOWAIT + i = omp_get_thread_num() if (i==0) then + call zmq_set_running(zmq_to_qp_run_socket) call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2) + else if (i==1) then + do i=2001,tbc(0) + i_generator_end = min(i+generator_per_task-1, tbc(0)) + if(tbc(i) > fragment_first) then + write(task,*) (i_generator_end-i+1), 0, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket2,task) + else + do j=1,fragment_count + write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket2,task) + end do + end if + end do + call pt2_slave_inproc(1) else call pt2_slave_inproc(i) endif + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket2) !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, 'pt2') tbc(0) = 0 @@ -317,7 +338,7 @@ subroutine get_last_full_tooth(computed, last_tooth) last_tooth = 0 combLoop : do i=comb_teeth, 1, -1 - missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-7) ! /128 + missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-6) ! /64 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 if(.not.computed(j)) then missing -= 1 @@ -334,41 +355,57 @@ BEGIN_PROVIDER [ integer, size_tbc ] size_tbc = N_det_generators + fragment_count*fragment_first END_PROVIDER - -subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) +subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) implicit none - double precision, intent(in) :: maxWorkload double precision, intent(out) :: comb(Ncomb) integer, intent(inout) :: tbc(0:size_tbc) integer, intent(inout) :: Ncomb logical, intent(inout) :: computed(N_det_generators) - integer :: i, j, last_full, dets(comb_teeth) - double precision :: myWorkload + integer :: i, j, last_full, dets(comb_teeth), tbc_save - myWorkload = 0d0 - call RANDOM_NUMBER(comb) - do i=1,size(comb) - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, myWorkload) - Ncomb = i - - call get_last_full_tooth(computed, last_full) - if(Ncomb >= 30 .and. last_full /= 0) then - do j=1,first_det_of_teeth(last_full+1)-1 - if(.not.(computed(j))) then - tbc(0) += 1 - tbc(tbc(0)) = j - computed(j) = .true. - myWorkload += comb_workload(j) - print *, "filled ", j, "to reach tooth", last_full, "ending at", first_det_of_teeth(last_full+1) - end if + do j=1,size(comb),100 + do i=j,min(size(comb),j+99) + comb(i) = comb(i) * comb_step + tbc_save = tbc(0) + !DIR$ FORCEINLINE + call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) + if (tbc(0) < size(tbc)) then + Ncomb = i + else + tbc(0) = tbc_save + return + endif end do - end if + call get_filling_teeth(computed, tbc) + enddo + +end subroutine + + +subroutine get_filling_teeth(computed, tbc) + implicit none + integer, intent(inout) :: tbc(0:size_tbc) + logical, intent(inout) :: computed(N_det_generators) + integer :: i, j, k, last_full, dets(comb_teeth) + + call get_last_full_tooth(computed, last_full) + if(last_full /= 0) then + if (tbc(0) > size(tbc) - first_det_of_teeth(last_full+1) -2) then + return + endif + k = tbc(0)+1 + do j=1,first_det_of_teeth(last_full+1)-1 + if(.not.(computed(j))) then + tbc(k) = j + k=k+1 + computed(j) = .true. +! print *, "filled ", j, "to reach tooth", last_full, "ending at", first_det_of_teeth(last_full+1) + end if + end do + tbc(0) = k-1 + end if - if(myWorkload > maxWorkload) exit - end do end subroutine @@ -394,10 +431,11 @@ subroutine reorder_tbc(tbc) end subroutine -subroutine get_comb(stato, dets) +subroutine get_comb(stato, dets, ct) implicit none + integer, intent(in) :: ct double precision, intent(in) :: stato - integer, intent(out) :: dets(comb_teeth) + integer, intent(out) :: dets(ct) double precision :: curs integer :: j integer, external :: pt2_find @@ -405,38 +443,39 @@ subroutine get_comb(stato, dets) curs = 1d0 - stato do j = comb_teeth, 1, -1 !DIR$ FORCEINLINE - dets(j) = pt2_find(curs, pt2_cweight,N_det_generators) + dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight)) curs -= comb_step end do end subroutine -subroutine add_comb(comb, computed, tbc, workload) +subroutine add_comb(comb, computed, tbc, stbc, ct) implicit none + integer, intent(in) :: stbc, ct double precision, intent(in) :: comb logical, intent(inout) :: computed(N_det_generators) - double precision, intent(inout) :: workload - integer, intent(inout) :: tbc(0:size_tbc) - integer :: i, dets(comb_teeth) + integer, intent(inout) :: tbc(0:stbc) + integer :: i, k, l, dets(ct) !DIR$ FORCEINLINE - call get_comb(comb, dets) + call get_comb(comb, dets, ct) - do i = 1, comb_teeth - if(.not.(computed(dets(i)))) then - tbc(0) += 1 - tbc(tbc(0)) = dets(i) - workload += comb_workload(dets(i)) - computed(dets(i)) = .true. + k=tbc(0)+1 + do i = 1, ct + l = dets(i) + if(.not.(computed(l))) then + tbc(k) = l + k = k+1 + computed(l) = .true. end if end do + tbc(0) = k-1 end subroutine BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_workload, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_step ] &BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] &BEGIN_PROVIDER [ integer, first_det_of_comb ] @@ -455,7 +494,6 @@ end subroutine pt2_weight = pt2_weight / pt2_cweight(N_det_generators) pt2_cweight = pt2_cweight / pt2_cweight(N_det_generators) - comb_workload = 1d0 / dfloat(N_det_generators) norm_left = 1d0 diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 949a6d28..070d3f97 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -124,7 +124,7 @@ subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntas if(rc /= 4*ntask) stop "push" ! Activate is zmq_socket_push is a REQ -! rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) + rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) end subroutine @@ -154,7 +154,7 @@ subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntas if(rc /= 4*ntask) stop "pull" ! Activate is zmq_socket_pull is a REP -! rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) + rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) end subroutine diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 7d48e5c0..5bf00a1d 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -115,7 +115,7 @@ subroutine push_selection_results(zmq_socket_push, pt2, b, task_id, ntask) if(rc /= 4*ntask) stop "push" ! Activate is zmq_socket_push is a REQ -! rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) + rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) end subroutine @@ -149,7 +149,7 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, val, det, N, task_id, nt if(rc /= 4*ntask) stop "pull" ! Activate is zmq_socket_pull is a REP -! rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) + rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) end subroutine diff --git a/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f b/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f index ce4518cf..38c78388 100644 --- a/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f +++ b/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f @@ -57,12 +57,12 @@ subroutine push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, endif ! Activate is zmq_socket_push is a REQ -! integer :: idummy -! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) -! if (rc /= 4) then -! print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)' -! stop 'error' -! endif + integer :: idummy + rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)' + stop 'error' + endif end @@ -187,11 +187,11 @@ subroutine ao_bielec_integrals_in_map_collector rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) ! Activate if zmq_socket_pull is a REP -! rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) -! if (rc /= 4) then -! print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' -! stop 'error' -! endif + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop 'error' + endif call insert_into_ao_integrals_map(n_integrals,buffer_i,buffer_value) diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 5ffe9ee2..9e28aff5 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -235,8 +235,8 @@ function new_zmq_pull_socket() if (zmq_context == 0_ZMQ_PTR) then stop 'zmq_context is uninitialized' endif - new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL) -! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP) +! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL) + new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP) call omp_unset_lock(zmq_lock) if (new_zmq_pull_socket == 0_ZMQ_PTR) then stop 'Unable to create zmq pull socket' @@ -312,8 +312,8 @@ function new_zmq_push_socket(thread) if (zmq_context == 0_ZMQ_PTR) then stop 'zmq_context is uninitialized' endif - new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH) -! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ) +! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH) + new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ) call omp_unset_lock(zmq_lock) if (new_zmq_push_socket == 0_ZMQ_PTR) then stop 'Unable to create zmq push socket' From 3bbc3980c59fb5f5cfb5c844e7d77d1dbdb571c7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 11:39:59 +0100 Subject: [PATCH 041/106] Corrected bug in pt2stoch --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index b96cf883..1176a144 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -124,7 +124,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, double precision :: myVal, myVal2 mainLoop : do i=1,Ncomb - call get_comb(comb(i), dets) + call get_comb(comb(i), dets, comb_teeth) do j=1,comb_teeth if(.not.(computed(dets(j)))) then exit mainLoop From 17386004bdf77ffbb6b9b73090e0fb4ebc9c1a32 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 14:06:57 +0100 Subject: [PATCH 042/106] Acceleration of pt2_find --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 22 ++++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 1176a144..dbbf11f5 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -167,7 +167,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su type(selection_buffer), intent(inout) :: b - double precision :: pt2_mwen(N_states, N_det) + double precision :: pt2_mwen(N_states, N_det_generators) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -203,7 +203,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(N_det_generators)) more = 1 if (time0 < 0.d0) then time0 = omp_get_wtime() @@ -277,14 +277,14 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su end subroutine -integer function pt2_find(v, w, sze) +integer function pt2_find(v, w, sze, imin, imax) implicit none - integer, intent(in) :: sze + integer, intent(in) :: sze, imin, imax double precision, intent(in) :: v, w(sze) integer :: i,l,h - l = 0 - h = N_det-1 + l = imin + h = imax-1 do while(h >= l) i = ishft(h+l,-1) @@ -443,7 +443,7 @@ subroutine get_comb(stato, dets, ct) curs = 1d0 - stato do j = comb_teeth, 1, -1 !DIR$ FORCEINLINE - dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight)) + dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) curs -= comb_step end do end subroutine @@ -476,6 +476,7 @@ end subroutine BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb_step ] &BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] &BEGIN_PROVIDER [ integer, first_det_of_comb ] @@ -508,9 +509,12 @@ end subroutine comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step - stato = 1d0 - comb_step! + 1d-5 + stato = 1d0 - comb_step + iloc = N_det_generators do i=comb_teeth, 1, -1 - first_det_of_teeth(i) = pt2_find(stato, pt2_cweight, N_det_generators) + integer :: iloc + iloc = pt2_find(stato, pt2_cweight, N_det_generators, 0, iloc) + first_det_of_teeth(i) = iloc stato -= comb_step end do first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 From 848fda18364a1e58bdff5d49a359b4d68f1bb15c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 14:15:18 +0100 Subject: [PATCH 043/106] Fixed n_core --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- src/Bitmask/bitmasks.irp.f | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index dbbf11f5..00e1893f 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -136,7 +136,7 @@ subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, do j=comb_teeth,1,-1 myVal += pt2_detail(1, dets(j)) * pt2_weight_inv(dets(j)) * comb_step sumabove(j) += myVal - sum2above(j) += myVal**2 + sum2above(j) += myVal*myVal Nabove(j) += 1 end do end do mainLoop diff --git a/src/Bitmask/bitmasks.irp.f b/src/Bitmask/bitmasks.irp.f index 964c4ed8..6fe201ff 100644 --- a/src/Bitmask/bitmasks.irp.f +++ b/src/Bitmask/bitmasks.irp.f @@ -554,16 +554,15 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, n_core_orb] implicit none BEGIN_DOC - ! Core orbitals bitmask + ! Core + deleted orbitals bitmask END_DOC integer :: i,j n_core_orb = 0 do i = 1, N_int - core_bitmask(i,1) = xor(full_ijkl_bitmask(i),ior(reunion_of_cas_inact_bitmask(i,1),virt_bitmask(i,1))) - core_bitmask(i,2) = xor(full_ijkl_bitmask(i),ior(reunion_of_cas_inact_bitmask(i,2),virt_bitmask(i,1))) + core_bitmask(i,1) = xor(HF_bitmask(i,1),ior(reunion_of_cas_inact_bitmask(i,1),virt_bitmask(i,1))) + core_bitmask(i,2) = xor(HF_bitmask(i,2),ior(reunion_of_cas_inact_bitmask(i,2),virt_bitmask(i,1))) n_core_orb += popcnt(core_bitmask(i,1)) enddo - print*,'n_core_orb = ',n_core_orb END_PROVIDER From 832585a6ca2a006dd66fe1c13563dffffdb4a0c2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 15:17:50 +0100 Subject: [PATCH 044/106] Acceleration of pt2_find --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 00e1893f..0cf719e5 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -284,9 +284,9 @@ integer function pt2_find(v, w, sze, imin, imax) integer :: i,l,h l = imin - h = imax-1 + h = imax - do while(h >= l) + do while(h-l > 4) i = ishft(h+l,-1) if(w(i+1) > v) then h = i-1 @@ -294,7 +294,14 @@ integer function pt2_find(v, w, sze, imin, imax) l = i+1 end if end do - pt2_find = l+1 + do i=l,h + if ( w(i) <= v) then + cycle + else + pt2_find = i-1 + return + endif + enddo end function From ca0f0732c2305fb33a069a280aa78c28518ed52d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 16:35:47 +0100 Subject: [PATCH 045/106] Corrected bug in pt2_find --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 0cf719e5..0a5d287e 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -276,17 +276,17 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su call sort_selection_buffer(b) end subroutine - integer function pt2_find(v, w, sze, imin, imax) implicit none integer, intent(in) :: sze, imin, imax double precision, intent(in) :: v, w(sze) integer :: i,l,h + integer, parameter :: block=64 l = imin - h = imax + h = imax-1 - do while(h-l > 4) + do while(h-l >= block) i = ishft(h+l,-1) if(w(i+1) > v) then h = i-1 @@ -294,14 +294,12 @@ integer function pt2_find(v, w, sze, imin, imax) l = i+1 end if end do - do i=l,h - if ( w(i) <= v) then - cycle - else - pt2_find = i-1 - return - endif - enddo + !DIR$ LOOP COUNT (64) + do pt2_find=l,min(l+block,h) + if(w(pt2_find+1) > v) then + exit + end if + end do end function From fe5f640346d59321aba4beb85191f61982a51d53 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 17:00:39 +0100 Subject: [PATCH 046/106] Corrected bug in pt2stoch --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 0a5d287e..96d770eb 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -295,8 +295,8 @@ integer function pt2_find(v, w, sze, imin, imax) end if end do !DIR$ LOOP COUNT (64) - do pt2_find=l,min(l+block,h) - if(w(pt2_find+1) > v) then + do pt2_find=l,h + if(w(pt2_find) >= v) then exit end if end do From 1dfc8979beb92a46371885efa6fbf30aa3cdf836 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 17:53:53 +0100 Subject: [PATCH 047/106] Reduced nb of tooth fillings --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 96d770eb..5a7c92a1 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -367,10 +367,12 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) integer, intent(inout) :: Ncomb logical, intent(inout) :: computed(N_det_generators) integer :: i, j, last_full, dets(comb_teeth), tbc_save + integer :: n + n = int(sqrt(dble(size(comb)))) call RANDOM_NUMBER(comb) - do j=1,size(comb),100 - do i=j,min(size(comb),j+99) + do j=1,size(comb),n + do i=j,min(size(comb),j+n-1) comb(i) = comb(i) * comb_step tbc_save = tbc(0) !DIR$ FORCEINLINE From b6b8f7bcba6706e7c1e457afc9bd7c876a981c48 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 18:57:20 +0100 Subject: [PATCH 048/106] Accelerated selection --- plugins/Full_CI_ZMQ/selection.irp.f | 43 +++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 033404b9..afcb51db 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -348,7 +348,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p if(i <= N_det_selectors) then preinteresting(0) += 1 preinteresting(preinteresting(0)) = i - preinteresting_det(:,:,preinteresting(0)) = psi_det_sorted(:,:,i) + do j=1,N_int + preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted(j,1,i) + preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted(j,2,i) + enddo else if(nt <= 2) then prefullinteresting(0) += 1 prefullinteresting(prefullinteresting(0)) = i @@ -384,11 +387,21 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p if(nt <= 4) then interesting(0) += 1 interesting(interesting(0)) = i - minilist(:,:,interesting(0)) = psi_det_sorted(:,:,i) + minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii) + minilist(1,2,interesting(0)) = preinteresting_det(1,2,ii) + do j=2,N_int + minilist(j,1,interesting(0)) = preinteresting_det(j,1,ii) + minilist(j,2,interesting(0)) = preinteresting_det(j,2,ii) + enddo if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i - fullminilist(:,:,fullinteresting(0)) = psi_det_sorted(:,:,i) + fullminilist(1,1,fullinteresting(0)) = preinteresting_det(1,1,ii) + fullminilist(1,2,fullinteresting(0)) = preinteresting_det(1,2,ii) + do j=2,N_int + fullminilist(j,1,fullinteresting(0)) = preinteresting_det(j,1,ii) + fullminilist(j,2,fullinteresting(0)) = preinteresting_det(j,2,ii) + enddo end if end if end do @@ -408,7 +421,12 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i - fullminilist(:,:,fullinteresting(0)) = psi_det_sorted(:,:,i) + fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i) + fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i) + do j=2,N_int + fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i) + fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i) + enddo end if end do @@ -431,7 +449,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p h2 = hole_list(i2,s2) call apply_hole(pmask, s2,h2, mask, ok, N_int) banned = .false. - bannedOrb(1:mo_tot_num, 1:2) = .true. + do j=1,mo_tot_num + bannedOrb(j, 1) = .true. + bannedOrb(j, 2) = .true. + enddo do s3=1,2 do i=1,N_particles(s3) bannedOrb(particle_list(i,s3), s3) = .false. @@ -582,10 +603,16 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) call past_d2(banned, p, sp) if(nt == 3) call past_d1(bannedOrb, p) else - if(interesting(i) == i_gen) then -! bandon = .true. + if(interesting(i) /= i_gen) then + continue + else +! bandon = .true. if(sp == 3) then - banned(:,:,2) = transpose(banned(:,:,1)) + do j=1,mo_tot_num + do k=1,mo_tot_num + banned(j,k,2) = banned(k,j,1) + enddo + enddo else do k=1,mo_tot_num do l=k+1,mo_tot_num From d1507c937a510fc6b85a562fbe12c0c00db31513 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 19:43:38 +0100 Subject: [PATCH 049/106] Optimized selection --- plugins/Full_CI_ZMQ/selection.irp.f | 163 +++++++++++++++++----------- 1 file changed, 102 insertions(+), 61 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index afcb51db..587618c8 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -55,7 +55,7 @@ subroutine get_mask_phase(det, phasemask) implicit none integer(bit_kind), intent(in) :: det(N_int, 2) - integer(1), intent(out) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(out) :: phasemask(2,N_int*bit_kind_size) integer :: s, ni, i logical :: change @@ -65,7 +65,7 @@ subroutine get_mask_phase(det, phasemask) do ni=1,N_int do i=0,bit_kind_size-1 if(BTEST(det(ni, s), i)) change = .not. change - if(change) phasemask((ni-1)*bit_kind_size + i + 1, s) = 1_1 + if(change) phasemask(s, (ni-1)*bit_kind_size + i + 1) = 1_1 end do end do end do @@ -104,18 +104,20 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) use bitmasks implicit none - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(in) :: phasemask(2,*) integer, intent(in) :: s1, s2, h1, h2, p1, p2 logical :: change - integer(1) :: np - double precision, parameter :: res(0:1) = (/1d0, -1d0/) + integer(1) :: np1 + integer :: np + double precision, save :: res(0:1) = (/1d0, -1d0/) - np = phasemask(h1,s1) + phasemask(p1,s1) + phasemask(h2,s2) + phasemask(p2,s2) - if(p1 < h1) np = np + 1_1 - if(p2 < h2) np = np + 1_1 + np1 = phasemask(s1,h1) + phasemask(s1,p1) + phasemask(s2,h2) + phasemask(s2,p2) + np = np1 + if(p1 < h1) np = np + 1 + if(p2 < h2) np = np + 1 - if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1_1 - get_phase_bi = res(iand(np,1_1)) + if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1 + get_phase_bi = res(iand(np,1)) end @@ -125,7 +127,7 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) implicit none integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size) logical, intent(in) :: bannedOrb(mo_tot_num) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: vect(N_states, mo_tot_num) @@ -184,7 +186,7 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) implicit none integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size) logical, intent(in) :: bannedOrb(mo_tot_num) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: vect(N_states, mo_tot_num) @@ -246,7 +248,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) implicit none integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size) logical, intent(in) :: bannedOrb(mo_tot_num) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: vect(N_states, mo_tot_num) @@ -337,8 +339,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p end do do i=1,N_det - nt = 0 - do j=1,N_int + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + do j=2,N_int mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) @@ -578,9 +582,18 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere do i=1, N_sel ! interesting(0) !i = interesting(ii) + if (interesting(i) < 0) then + stop 'prefetch interesting(i)' + endif + - nt = 0 - do j=1,N_int + mobMask(1,1) = iand(negMask(1,1), det(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), det(1,2,i)) + nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + + if(nt > 4) cycle + + do j=2,N_int mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) @@ -588,25 +601,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt > 4) cycle - do j=1,N_int - perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) - perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) - end do - - call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) - call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) - - call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) - call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - - if(interesting(i) < i_gen) then - if(nt == 4) call past_d2(banned, p, sp) - if(nt == 3) call past_d1(bannedOrb, p) - else - if(interesting(i) /= i_gen) then - continue - else -! bandon = .true. + if (interesting(i) == i_gen) then if(sp == 3) then do j=1,mo_tot_num do k=1,mo_tot_num @@ -620,14 +615,32 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere end do end do end if - end if - if(nt == 4) then - call get_d2(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) - else if(nt == 3) then - call get_d1(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) - else - call get_d0(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) - end if + end if + + call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) + + perMask(1,1) = iand(mask(1,1), not(det(1,1,i))) + perMask(1,2) = iand(mask(1,2), not(det(1,2,i))) + do j=2,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int) + + if (interesting(i) >= i_gen) then + if(nt == 4) then + call get_d2(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + else if(nt == 3) then + call get_d1(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + else + call get_d0(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + end if + else + if(nt == 4) call past_d2(banned, p, sp) + if(nt == 3) call past_d1(bannedOrb, p) end if end do end @@ -638,7 +651,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) implicit none integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size) logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) @@ -687,20 +700,20 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end if end do else - do i = 1,2 + h1 = h(1,1) + h2 = h(1,2) do j = 1,2 - puti = p(i, 1) putj = p(j, 2) - - if(banned(puti,putj,bant)) cycle - p1 = p(turn2(i), 1) p2 = p(turn2(j), 2) - h1 = h(1,1) - h2 = h(1,2) - - hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - mat(:, puti, putj) += coefs * hij - end do + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + + hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) + mat(:, puti, putj) += coefs * hij + end do end do end if @@ -756,7 +769,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) implicit none integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(1),intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1),intent(in) :: phasemask(2,N_int*bit_kind_size) logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) integer(bit_kind) :: det(N_int, 2) double precision, intent(in) :: coefs(N_states) @@ -925,7 +938,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) implicit none integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(1), intent(in) :: phasemask(N_int*bit_kind_size, 2) + integer(1), intent(in) :: phasemask(2,N_int*bit_kind_size) logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) integer(bit_kind) :: det(N_int, 2) double precision, intent(in) :: coefs(N_states) @@ -953,8 +966,8 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) call i_h_j(gen, det, N_int, hij) else - hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) + hij = integral8(p1, p2, h1, h2) * phase end if mat(:, p1, p2) += coefs(:) * hij end do @@ -1059,9 +1072,37 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) end do - call bitstring_to_list(myMask(1,1), list(1), na, N_int) - call bitstring_to_list(myMask(1,2), list(na+1), nb, N_int) + call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int) + call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int) banned(list(1), list(2)) = .true. end do genl end + +subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Gives the inidices(+1) of the bits set to 1 in the bit string + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: string(Nint) + integer, intent(out) :: list(Nint*bit_kind_size) + integer, intent(out) :: n_elements + + integer :: i, ishift + integer(bit_kind) :: l + + n_elements = 0 + ishift = 2 + do i=1,Nint + l = string(i) + do while (l /= 0_bit_kind) + n_elements = n_elements+1 + list(n_elements) = ishift+popcnt(l-1_bit_kind) - popcnt(l) + l = iand(l,l-1_bit_kind) + enddo + ishift = ishift + bit_kind_size + enddo + +end From 6ea3216b98e3c9da2f0b72702672ea6b48e25828 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 21:07:26 +0100 Subject: [PATCH 050/106] Fixed MRCEPA0 --- plugins/mrcepa0/dressing_slave.irp.f | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index 9e9fa65a..487e6ed3 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -315,13 +315,13 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) stop 'error' endif -! ! Activate is zmq_socket_push is a REQ -! integer :: idummy -! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) -! if (rc /= 4) then -! print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' -! stop 'error' -! endif + ! Activate is zmq_socket_push is a REQ + integer :: idummy + rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' + stop 'error' + endif end @@ -389,13 +389,13 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, endif -! ! Activate is zmq_socket_pull is a REP -! integer :: idummy -! rc = f77_zmq_send( zmq_socket_pull, idummy, 4, 0) -! if (rc /= 4) then -! print *, irp_here, 'f77_zmq_send( zmq_socket_pull, idummy, 4, 0)' -! stop 'error' -! endif + ! Activate is zmq_socket_pull is a REP + integer :: idummy + rc = f77_zmq_send( zmq_socket_pull, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_pull, idummy, 4, 0)' + stop 'error' + endif end From 5f2407ce226ae4da0dd3685b83e78bc2078a84fb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 21:39:34 +0100 Subject: [PATCH 051/106] Fixed Travis warning --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 5a7c92a1..f3241f90 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -159,10 +159,10 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su integer, intent(in) :: Ncomb double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) - double precision, intent(in) :: comb(Ncomb) + double precision, intent(in) :: comb(Ncomb), relative_error logical, intent(inout) :: computed(N_det_generators) integer, intent(in) :: tbc(0:size_tbc) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth), relative_error + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, intent(out) :: pt2(N_states) From ca6672c25040efb08b785481288540088a0ab9ae Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 21:46:20 +0100 Subject: [PATCH 052/106] Removed PUSH/PULL --- plugins/CAS_SD_ZMQ/run_selection_slave.irp.f | 4 ++-- plugins/mrcc_selected/dressing_slave.irp.f | 24 ++++++++++---------- src/Determinants/H_apply.irp.f | 22 +++++++++--------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f index dfaee629..35b482f0 100644 --- a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f +++ b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f @@ -115,7 +115,7 @@ subroutine push_selection_results(zmq_socket_push, pt2, b, task_id, ntask) if(rc /= 4*ntask) stop "push" ! Activate is zmq_socket_push is a REQ -! rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) + rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) end subroutine @@ -149,7 +149,7 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, val, det, N, task_id, nt if(rc /= 4*ntask) stop "pull" ! Activate is zmq_socket_pull is a REP -! rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) + rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) end subroutine diff --git a/plugins/mrcc_selected/dressing_slave.irp.f b/plugins/mrcc_selected/dressing_slave.irp.f index c2e5dd55..8d488f36 100644 --- a/plugins/mrcc_selected/dressing_slave.irp.f +++ b/plugins/mrcc_selected/dressing_slave.irp.f @@ -294,12 +294,12 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) endif ! ! Activate is zmq_socket_push is a REQ -! integer :: idummy -! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) -! if (rc /= 4) then -! print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' -! stop 'error' -! endif + integer :: idummy + rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' + stop 'error' + endif end @@ -368,12 +368,12 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, ! ! Activate is zmq_socket_pull is a REP -! integer :: idummy -! rc = f77_zmq_send( zmq_socket_pull, idummy, 4, 0) -! if (rc /= 4) then -! print *, irp_here, 'f77_zmq_send( zmq_socket_pull, idummy, 4, 0)' -! stop 'error' -! endif + integer :: idummy + rc = f77_zmq_send( zmq_socket_pull, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_pull, idummy, 4, 0)' + stop 'error' + endif end diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 411fe703..a6a7310f 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -362,12 +362,12 @@ subroutine push_pt2(zmq_socket_push,pt2,norm_pert,H_pert_diag,i_generator,N_st,t endif ! Activate if zmq_socket_push is a REQ -! integer :: idummy -! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) -! if (rc /= 4) then -! print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' -! stop 'error' -! endif + integer :: idummy + rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' + stop 'error' + endif end subroutine pull_pt2(zmq_socket_pull,pt2,norm_pert,H_pert_diag,i_generator,N_st,n,task_id) @@ -433,11 +433,11 @@ subroutine pull_pt2(zmq_socket_pull,pt2,norm_pert,H_pert_diag,i_generator,N_st,n endif ! Activate if zmq_socket_pull is a REP -! rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) -! if (rc /= 4) then -! print *, irp_here, 'f77_zmq_send( zmq_socket_pull, 0, 4, 0)' -! stop 'error' -! endif + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, 'f77_zmq_send( zmq_socket_pull, 0, 4, 0)' + stop 'error' + endif end From 245a2a90eda60b024fc6ed223fb586417fc6297b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 21:52:37 +0100 Subject: [PATCH 053/106] Fixed Travis --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index f3241f90..9afab796 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -263,9 +263,8 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su time = omp_get_wtime() print "(A, 4(E15.7), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) if (dabs(eqt/avg) < relative_error) then - relative_error = 0.d0 pt2(1) = avg - exit + exit pullLoop endif end if end do pullLoop @@ -398,16 +397,13 @@ subroutine get_filling_teeth(computed, tbc) call get_last_full_tooth(computed, last_full) if(last_full /= 0) then - if (tbc(0) > size(tbc) - first_det_of_teeth(last_full+1) -2) then - return - endif k = tbc(0)+1 do j=1,first_det_of_teeth(last_full+1)-1 if(.not.(computed(j))) then tbc(k) = j k=k+1 computed(j) = .true. -! print *, "filled ", j, "to reach tooth", last_full, "ending at", first_det_of_teeth(last_full+1) + if (k>size_tbc) exit end if end do tbc(0) = k-1 From 20edbbd777348dfc3211d0c43aaceae78c46375a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Feb 2017 22:25:27 +0100 Subject: [PATCH 054/106] More frequent teeth updates --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 42 +++++++++++--------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 9afab796..753afcc9 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -366,24 +366,27 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) integer, intent(inout) :: Ncomb logical, intent(inout) :: computed(N_det_generators) integer :: i, j, last_full, dets(comb_teeth), tbc_save - integer :: n - n = int(sqrt(dble(size(comb)))) - + integer :: icount, n + n = tbc(0) + icount = 0 call RANDOM_NUMBER(comb) - do j=1,size(comb),n - do i=j,min(size(comb),j+n-1) - comb(i) = comb(i) * comb_step - tbc_save = tbc(0) - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) - if (tbc(0) < size(tbc)) then - Ncomb = i - else - tbc(0) = tbc_save - return - endif - end do - call get_filling_teeth(computed, tbc) + do i=1,size(comb) + comb(i) = comb(i) * comb_step + tbc_save = tbc(0) + !DIR$ FORCEINLINE + call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) + if (tbc(0) < size(tbc)) then + Ncomb = i + else + tbc(0) = tbc_save + return + endif + icount = icount + tbc(0) - tbc_save + if (icount > n) then + call get_filling_teeth(computed, tbc) + icount = 0 + n = ishft(tbc_save,-1) + endif enddo end subroutine @@ -397,13 +400,16 @@ subroutine get_filling_teeth(computed, tbc) call get_last_full_tooth(computed, last_full) if(last_full /= 0) then + if (tbc(0) > size(tbc) - first_det_of_teeth(last_full+1) -2) then + return + endif k = tbc(0)+1 do j=1,first_det_of_teeth(last_full+1)-1 if(.not.(computed(j))) then tbc(k) = j k=k+1 computed(j) = .true. - if (k>size_tbc) exit +! print *, "filled ", j, "to reach tooth", last_full, "ending at", first_det_of_teeth(last_full+1) end if end do tbc(0) = k-1 From 9a7db910d9a166aadfa8040996e90c505fcbe7b1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 8 Feb 2017 20:54:15 +0100 Subject: [PATCH 055/106] Fixed distributed Davidson --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 4 +- plugins/Full_CI_ZMQ/selection.irp.f | 1 + src/Davidson/davidson_parallel.irp.f | 68 ++++++++++------- src/Davidson/parameters.irp.f | 5 ++ src/Davidson/u0Hu0.irp.f | 78 ++++++++++++-------- 5 files changed, 96 insertions(+), 60 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 753afcc9..d7c98933 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -342,7 +342,7 @@ subroutine get_last_full_tooth(computed, last_tooth) last_tooth = 0 combLoop : do i=comb_teeth, 1, -1 - missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-6) ! /64 + missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-5) ! /32 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 if(.not.computed(j)) then missing -= 1 @@ -385,7 +385,7 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) if (icount > n) then call get_filling_teeth(computed, tbc) icount = 0 - n = ishft(tbc_save,-1) + n = ishft(tbc_save,-4) endif enddo diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 587618c8..de7c93f8 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -546,6 +546,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d e_pert = 0.5d0 * ( tmp - delta_E) pt2(istate) = pt2(istate) + e_pert max_e_pert = min(e_pert,max_e_pert) +! ci(istate) = e_pert / mat(istate, p1, p2) end do if(dabs(max_e_pert) > buf%mini) then diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index cede52c9..724aac08 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -20,9 +20,10 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) double precision :: s2, hij logical, allocatable :: wrotten(:) + PROVIDE dav_det ref_bitmask_energy + allocate(wrotten(bs)) wrotten = .false. - PROVIDE dav_det ii=0 sh = blockb @@ -43,14 +44,15 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1 if(i == j) cycle - org_j = sort_idx_(j,1) ext = exa do ni=1,N_int ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) + if(ext > 4) exit end do if(ext <= 4) then - call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) + org_j = sort_idx_(j,1) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) + call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) if(.not. wrotten(ii)) then wrotten(ii) = .true. idx(ii) = org_i @@ -58,8 +60,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) st (:,ii) = 0d0 end if do istate=1,N_states_diag - vt (istate,ii) += hij*dav_ut(istate,org_j) - st (istate,ii) += s2*dav_ut(istate,org_j) + vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j) + st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j) enddo endif enddo @@ -76,23 +78,25 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 if(i == j) cycle org_j = sort_idx_(j,2) - ext = 0 - do ni=1,N_int + ext = popcnt(xor(sorted_(1,i,2), sorted_(1,j,2))) + if (ext > 4) cycle + do ni=2,N_int ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2))) + if (ext > 4) exit end do if(ext == 4) then - call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) - call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) - if(.not. wrotten(ii)) then - wrotten(ii) = .true. - idx(ii) = org_i - vt (:,ii) = 0d0 - st (:,ii) = 0d0 - end if - do istate=1,N_states_diag - vt (istate,ii) += hij*dav_ut(istate,org_j) - st (istate,ii) += s2*dav_ut(istate,org_j) - enddo + call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) + call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) + if(.not. wrotten(ii)) then + wrotten(ii) = .true. + idx(ii) = org_i + vt (:,ii) = 0d0 + st (:,ii) = 0d0 + end if + do istate=1,N_states_diag + vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j) + st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j) + enddo end if end do end do @@ -320,6 +324,15 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "davidson_push_results failed to push task_id" + +! Activate is zmq_socket_push is a REQ + integer :: idummy + rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)' + stop 'error' + endif + end subroutine @@ -358,6 +371,14 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + +! Activate if zmq_socket_pull is a REP + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop 'error' + endif + end subroutine @@ -434,18 +455,14 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA) double precision , intent(inout) :: v0(LDA, N_states_diag) double precision , intent(inout) :: s0(LDA, N_states_diag) - call zmq_set_running(zmq_to_qp_run_socket) - - zmq_collector = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - i = omp_get_thread_num() - PROVIDE nproc !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i) i = omp_get_thread_num() if (i == 0 ) then + zmq_collector = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) call end_zmq_to_qp_run_socket(zmq_collector) call end_zmq_pull_socket(zmq_socket_pull) @@ -457,7 +474,6 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA) endif !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'davidson') end subroutine diff --git a/src/Davidson/parameters.irp.f b/src/Davidson/parameters.irp.f index ae8babaa..7d383192 100644 --- a/src/Davidson/parameters.irp.f +++ b/src/Davidson/parameters.irp.f @@ -18,6 +18,11 @@ subroutine davidson_converged(energy,residual,wall,iterations,cpu,N_st,converged double precision :: E(N_st), time double precision, allocatable, save :: energy_old(:) + if (iterations < 2) then + converged = .False. + return + endif + if (.not.allocated(energy_old)) then allocate(energy_old(N_st)) energy_old = 0.d0 diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 9e76bc92..026921d0 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -267,6 +267,7 @@ END_PROVIDER subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) + use omp_lib use bitmasks use f77_zmq implicit none @@ -287,7 +288,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ double precision :: hij,s2 double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii - integer :: i0, j0 + integer :: i0, j0, ithread integer, allocatable :: shortcut(:,:), sort_idx(:) integer(bit_kind), allocatable :: sorted(:,:), version(:,:) @@ -321,41 +322,55 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ ut(istate,i) = u_0(i,istate) enddo enddo - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) - call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) blockb = shortcut(0,1) call davidson_init(handler,n,N_st_8,ut) - - ave_workload = 0.d0 - do sh=1,shortcut(0,1) - ave_workload += shortcut(0,1) - ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2 - end do - end do - enddo - ave_workload = ave_workload/dble(shortcut(0,1)) - target_workload_inv = 0.001d0/ave_workload - - - do sh=1,shortcut(0,1),1 - workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - workload += (shortcut(j+1,2) - shortcut(j, 2))**2 - end do - end do - istep = 1+ int(workload*target_workload_inv) - do blockb2=0, istep-1 - call davidson_add_task(handler, sh, blockb2, istep) - enddo - enddo - call davidson_run(handler, v_0, s_0, size(v_0,1)) + PROVIDE nproc + + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread == 0 ) then + + call zmq_set_running(handler) + ave_workload = 0.d0 + do sh=1,shortcut(0,1) + ave_workload += shortcut(0,1) + ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 + do i=sh, shortcut(0,2), shortcut(0,1) + do j=i, min(i, shortcut(0,2)) + ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2 + end do + end do + enddo + ave_workload = ave_workload/dble(shortcut(0,1)) + target_workload_inv = 0.01d0/ave_workload + + + do sh=1,shortcut(0,1),1 + workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 + do i=sh, shortcut(0,2), shortcut(0,1) + do j=i, min(i, shortcut(0,2)) + workload += (shortcut(j+1,2) - shortcut(j, 2))**2 + end do + end do + istep = 1+ int(workload*target_workload_inv) + do blockb2=0, istep-1 + call davidson_add_task(handler, sh, blockb2, istep) + enddo + enddo + call davidson_run(handler, v_0, s_0, size(v_0,1)) + else if (ithread == 1 ) then + call davidson_miniserver_run () + else + call davidson_slave_inproc(ithread) + endif + !$OMP END PARALLEL + + call end_parallel_job(handler, 'davidson') do istate=1,N_st do i=1,n @@ -551,7 +566,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=i+1,shortcut(sh+1,1)-1 - if (i==j) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint From 3b935080b49be6d301e8a5d713096c90389ac46b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 8 Feb 2017 20:58:18 +0100 Subject: [PATCH 056/106] Dummy read in integrals when restart --- src/Utils/map_functions.irp.f | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Utils/map_functions.irp.f b/src/Utils/map_functions.irp.f index e3745d05..f5d6f4f8 100644 --- a/src/Utils/map_functions.irp.f +++ b/src/Utils/map_functions.irp.f @@ -113,6 +113,12 @@ subroutine map_load_from_disk(filename,map) do j=1,n_elements x = x + map % map(i) % value(j) l = iand(l,map % map(i) % key(j)) + if (map % map(i) % value(j) > 1.e30) then + stop 'Error in integrals file' + endif + if (map % map(i) % key(j) < 0) then + stop 'Error in integrals file' + endif enddo enddo map % sorted = x>0 .or. l == 0_8 From 67f8fc8e3e956300a7013ba7fd8bc2931afd7ff1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 9 Feb 2017 20:32:00 +0100 Subject: [PATCH 057/106] Parallelized make_s2_eigenfunction --- src/Determinants/occ_pattern.irp.f | 41 ++++++++++++------------------ 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index 42bca8eb..707be0f2 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -36,7 +36,7 @@ subroutine occ_pattern_to_dets_size(o,sze,n_alpha,Nint) amax -= popcnt( o(k,2) ) enddo sze = int( min(binom_func(bmax, amax), 1.d8) ) - sze = sze*sze + sze = sze*sze + 10 end @@ -246,14 +246,21 @@ subroutine make_s2_eigenfunction integer :: i,j,k integer :: smax, s integer(bit_kind), allocatable :: d(:,:,:), det_buffer(:,:,:) - integer :: N_det_new + integer :: N_det_new, ithread, omp_get_thread_num integer, parameter :: bufsze = 1000 logical, external :: is_in_wavefunction - allocate (d(N_int,2,1), det_buffer(N_int,2,bufsze) ) - smax = 1 - N_det_new = 0 + print *, 'Finding determinants for S^2...' + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(N_occ_pattern, psi_occ_pattern, elec_alpha_num,N_int) & + !$OMP PRIVATE(s,ithread, d, det_buffer, smax, N_det_new,i,j,k) + N_det_new = 0 + call occ_pattern_to_dets_size(psi_occ_pattern(1,1,1),s,elec_alpha_num,N_int) + allocate (d(N_int,2,s), det_buffer(N_int,2,bufsze) ) + smax = s + ithread = omp_get_thread_num() + !$OMP DO do i=1,N_occ_pattern call occ_pattern_to_dets_size(psi_occ_pattern(1,1,i),s,elec_alpha_num,N_int) s += 1 @@ -270,40 +277,24 @@ subroutine make_s2_eigenfunction det_buffer(k,1,N_det_new) = d(k,1,j) det_buffer(k,2,N_det_new) = d(k,2,j) enddo -! integer :: ne(2) -! ne(:) = 0 -! do k=1,N_int -! ne(1) += popcnt(d(k,1,j)) -! ne(2) += popcnt(d(k,2,j)) -! enddo -! if (ne(1) /= elec_alpha_num) then -! call debug_det(d(1,1,j),N_int) -! stop "ALPHA" -! endif -! if (ne(2) /= elec_beta_num) then -! call debug_det(d(1,1,j),N_int) -! stop "BETA" -! endif if (N_det_new == bufsze) then - call fill_H_apply_buffer_no_selection(bufsze,det_buffer,N_int,0) + call fill_H_apply_buffer_no_selection(bufsze,det_buffer,N_int,ithread) N_det_new = 0 endif endif enddo enddo + !$OMP END DO NOWAIT if (N_det_new > 0) then - call fill_H_apply_buffer_no_selection(N_det_new,det_buffer,N_int,0) -! call fill_H_apply_buffer_no_selection_first_order_coef(N_det_new,det_buffer,N_int,0) + call fill_H_apply_buffer_no_selection(N_det_new,det_buffer,N_int,ithread) endif - deallocate(d,det_buffer) + !$OMP END PARALLEL call copy_H_apply_buffer_to_wf SOFT_TOUCH N_det psi_coef psi_det print *, 'Added determinants for S^2' -! logical :: found -! call remove_duplicates_in_psi_det(found) end From 92d7bbd57ebc1a9f631b471e1042a952a397b81e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 10 Feb 2017 00:50:37 +0100 Subject: [PATCH 058/106] Fixed PT2 stoch: fragments broken --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 10 +++- plugins/Full_CI_ZMQ/selection.irp.f | 55 +++++++++++--------- src/Bitmask/bitmasks.irp.f | 10 ++-- src/Davidson/u0Hu0.irp.f | 15 +++++- 4 files changed, 58 insertions(+), 32 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index d7c98933..ac98bd9c 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -192,7 +192,9 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su actually_computed(:) = computed(:) parts_to_get(:) = 1 - if(fragment_first > 0) parts_to_get(1:fragment_first) = fragment_count + if(fragment_first > 0) then + parts_to_get(1:fragment_first) = fragment_count + endif do i=1,tbc(0) actually_computed(tbc(i)) = .false. @@ -356,6 +358,10 @@ end subroutine BEGIN_PROVIDER [ integer, size_tbc ] + implicit none + BEGIN_DOC +! Size of the tbc array + END_DOC size_tbc = N_det_generators + fragment_count*fragment_first END_PROVIDER @@ -522,7 +528,7 @@ end subroutine iloc = N_det_generators do i=comb_teeth, 1, -1 integer :: iloc - iloc = pt2_find(stato, pt2_cweight, N_det_generators, 0, iloc) + iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc) first_det_of_teeth(i) = iloc stato -= comb_step end do diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index de7c93f8..96b47e53 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -2,7 +2,8 @@ use bitmasks BEGIN_PROVIDER [ integer, fragment_count ] implicit none - fragment_count = (elec_alpha_num-n_core_orb)**2 +! fragment_count = (elec_alpha_num-n_core_orb)**2 + fragment_count = 1 END_PROVIDER @@ -44,7 +45,7 @@ subroutine assert(cond, msg) logical, intent(in) :: cond if(.not. cond) then - print *, "assert fail: "//msg + print *, "assert failed: "//msg stop end if end @@ -286,7 +287,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: fullMatch, ok integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) - integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) + integer,allocatable :: preinteresting(:), prefullinteresting(:), prefullinteresting_det(:,:,:), interesting(:), fullinteresting(:) integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical :: monoAdo, monoBdo; @@ -296,7 +297,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p monoBdo = .true. allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) - allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) + allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det), prefullinteresting_det(N_int,2,N_det)) do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) @@ -312,19 +313,19 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) - ! ====== - ! If the subset doesn't exist, return - logical :: will_compute - will_compute = subset == 0 - - if (.not.will_compute) then - maskInd = N_holes(1)*N_holes(2) + N_holes(2)*((N_holes(2)-1)/2) + N_holes(1)*((N_holes(1)-1)/2) - will_compute = (maskInd >= subset) - if (.not.will_compute) then - return - endif - endif - ! ====== +! ! ====== +! ! If the subset doesn't exist, return +! logical :: will_compute +! will_compute = subset == 0 +! +! if (.not.will_compute) then +! maskInd = N_holes(1)*N_holes(2) + N_holes(2)*((N_holes(2)-1)/2) + N_holes(1)*((N_holes(1)-1)/2) +! will_compute = (maskInd >= subset) +! if (.not.will_compute) then +! return +! endif +! endif +! ! ====== integer(bit_kind), allocatable:: preinteresting_det(:,:,:) @@ -359,6 +360,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p else if(nt <= 2) then prefullinteresting(0) += 1 prefullinteresting(prefullinteresting(0)) = i + do j=1,N_int + prefullinteresting_det(j,1,prefullinteresting(0)) = psi_det_sorted(j,1,i) + prefullinteresting_det(j,2,prefullinteresting(0)) = psi_det_sorted(j,2,i) + enddo end if end if end do @@ -413,23 +418,23 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do ii=1,prefullinteresting(0) i = prefullinteresting(ii) nt = 0 - mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) - mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + mobMask(1,1) = iand(negMask(1,1), prefullinteresting_det(1,1,ii)) + mobMask(1,2) = iand(negMask(1,2), prefullinteresting_det(1,1,ii)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) do j=2,N_int - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + mobMask(j,1) = iand(negMask(j,1), prefullinteresting_det(j,1,ii)) + mobMask(j,2) = iand(negMask(j,2), prefullinteresting_det(j,2,ii)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i - fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i) - fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i) + fullminilist(1,1,fullinteresting(0)) = prefullinteresting_det(1,1,ii) + fullminilist(1,2,fullinteresting(0)) = prefullinteresting_det(1,2,ii) do j=2,N_int - fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i) - fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i) + fullminilist(j,1,fullinteresting(0)) = prefullinteresting_det(j,1,ii) + fullminilist(j,2,fullinteresting(0)) = prefullinteresting_det(j,2,ii) enddo end if end do diff --git a/src/Bitmask/bitmasks.irp.f b/src/Bitmask/bitmasks.irp.f index 6fe201ff..10ab6f67 100644 --- a/src/Bitmask/bitmasks.irp.f +++ b/src/Bitmask/bitmasks.irp.f @@ -6,6 +6,7 @@ BEGIN_PROVIDER [ integer, N_int ] ! Number of 64-bit integers needed to represent determinants as binary strings END_DOC N_int = (mo_tot_num-1)/bit_kind_size + 1 + call write_int(6,N_int, 'N_int') END_PROVIDER @@ -386,6 +387,8 @@ END_PROVIDER n_virt_orb += popcnt(virt_bitmask(i,1)) enddo endif + call write_int(6,n_inact_orb, 'Number of inactive MOs') + call write_int(6,n_virt_orb, 'Number of virtual MOs') END_PROVIDER @@ -559,10 +562,11 @@ END_PROVIDER integer :: i,j n_core_orb = 0 do i = 1, N_int - core_bitmask(i,1) = xor(HF_bitmask(i,1),ior(reunion_of_cas_inact_bitmask(i,1),virt_bitmask(i,1))) - core_bitmask(i,2) = xor(HF_bitmask(i,2),ior(reunion_of_cas_inact_bitmask(i,2),virt_bitmask(i,1))) + core_bitmask(i,1) = xor(full_ijkl_bitmask(i),ior(reunion_of_cas_inact_bitmask(i,1),virt_bitmask(i,1))) + core_bitmask(i,2) = xor(full_ijkl_bitmask(i),ior(reunion_of_cas_inact_bitmask(i,2),virt_bitmask(i,1))) n_core_orb += popcnt(core_bitmask(i,1)) enddo + call write_int(6,n_core_orb,'Number of core MOs') END_PROVIDER @@ -597,7 +601,7 @@ BEGIN_PROVIDER [ integer, n_act_orb] do i = 1, N_int n_act_orb += popcnt(cas_bitmask(i,1,1)) enddo - print*,'n_act_orb = ',n_act_orb + call write_int(6,n_act_orb, 'Number of active MOs') END_PROVIDER BEGIN_PROVIDER [integer, list_act, (n_act_orb)] diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 026921d0..d9481886 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -405,6 +405,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 + logical, allocatable :: utloop(:) integer, allocatable :: shortcut(:,:), sort_idx(:,:) integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:) @@ -427,7 +428,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate( ut(N_st_8,n)) + allocate( ut(N_st_8,n), utloop(n) ) v_0 = 0.d0 s_0 = 0.d0 @@ -437,16 +438,19 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) + !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8,utloop) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 !$OMP DO do i=1,n + utloop(i) = .False. do istate=1,N_st ut(istate,i) = u_0(sort_idx(i,2),istate) + utloop(i) = utloop(i) .or. (dabs(u_0(sort_idx(i,2),istate)) > 1.d-20) enddo + utloop(i) = .not.utloop(i) enddo !$OMP END DO @@ -455,6 +459,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) do j=shortcut(sh,2),shortcut(sh+1,2)-1 + if (utloop(j)) cycle org_j = sort_idx(j,2) ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) if (ext > 4) cycle @@ -477,9 +482,12 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP DO do i=1,n + utloop(i) = .False. do istate=1,N_st ut(istate,i) = u_0(sort_idx(i,1),istate) + utloop(i) = utloop(i) .or. (dabs(u_0(sort_idx(i,2),istate)) > 1.d-20) enddo + utloop(i) = .not.utloop(i) enddo !$OMP END DO @@ -503,6 +511,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 + if (utloop(j)) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint @@ -540,6 +549,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=shortcut(sh,1),i-1 + if (utloop(j)) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint @@ -566,6 +576,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=i+1,shortcut(sh+1,1)-1 + if (utloop(j)) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint From 1bea2ef2d52c4df970904fb4dc863fc6652b004b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 10 Feb 2017 01:35:03 +0100 Subject: [PATCH 059/106] Fixed selection --- plugins/Full_CI_ZMQ/selection.irp.f | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 96b47e53..89667fec 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -370,7 +370,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p maskInd = -1 - integer :: nb_count + logical, allocatable :: banned(:,:,:) + logical, allocatable :: bannedOrb(:,:) + allocate(bannedOrb(mo_tot_num,2), banned(mo_tot_num,mo_tot_num,2)) + do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first @@ -419,7 +422,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p i = prefullinteresting(ii) nt = 0 mobMask(1,1) = iand(negMask(1,1), prefullinteresting_det(1,1,ii)) - mobMask(1,2) = iand(negMask(1,2), prefullinteresting_det(1,1,ii)) + mobMask(1,2) = iand(negMask(1,2), prefullinteresting_det(1,2,ii)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) do j=2,N_int mobMask(j,1) = iand(negMask(j,1), prefullinteresting_det(j,1,ii)) @@ -450,9 +453,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p if(s1 == s2) ib = i1+1 monoAdo = .true. do i2=N_holes(s2),ib,-1 ! Generate low excitations first - logical :: banned(mo_tot_num, mo_tot_num,2) - logical :: bannedOrb(mo_tot_num, 2) - maskInd += 1 if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then h2 = hole_list(i2,s2) @@ -492,6 +492,9 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p enddo enddo enddo + deallocate(bannedOrb, banned, prefullinteresting_det, preinteresting_det) + deallocate(minilist, fullminilist) + deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) end From 9354d7f5f1ac2f6e0ef33d207a9ba1636dd34728 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 10 Feb 2017 03:24:12 +0100 Subject: [PATCH 060/106] Fixed many bugs --- plugins/Full_CI_ZMQ/selection.irp.f | 35 ++++++++++++----------------- src/Davidson/u0Hu0.irp.f | 15 ++----------- src/Determinants/occ_pattern.irp.f | 3 ++- 3 files changed, 18 insertions(+), 35 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 89667fec..f27a42df 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -287,7 +287,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: fullMatch, ok integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) - integer,allocatable :: preinteresting(:), prefullinteresting(:), prefullinteresting_det(:,:,:), interesting(:), fullinteresting(:) + integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical :: monoAdo, monoBdo; @@ -297,7 +297,7 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p monoBdo = .true. allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) - allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det), prefullinteresting_det(N_int,2,N_det)) + allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) @@ -360,20 +360,13 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p else if(nt <= 2) then prefullinteresting(0) += 1 prefullinteresting(prefullinteresting(0)) = i - do j=1,N_int - prefullinteresting_det(j,1,prefullinteresting(0)) = psi_det_sorted(j,1,i) - prefullinteresting_det(j,2,prefullinteresting(0)) = psi_det_sorted(j,2,i) - enddo end if end if end do maskInd = -1 - logical, allocatable :: banned(:,:,:) - logical, allocatable :: bannedOrb(:,:) - allocate(bannedOrb(mo_tot_num,2), banned(mo_tot_num,mo_tot_num,2)) - + integer :: nb_count do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first @@ -421,23 +414,23 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do ii=1,prefullinteresting(0) i = prefullinteresting(ii) nt = 0 - mobMask(1,1) = iand(negMask(1,1), prefullinteresting_det(1,1,ii)) - mobMask(1,2) = iand(negMask(1,2), prefullinteresting_det(1,2,ii)) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) do j=2,N_int - mobMask(j,1) = iand(negMask(j,1), prefullinteresting_det(j,1,ii)) - mobMask(j,2) = iand(negMask(j,2), prefullinteresting_det(j,2,ii)) + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i - fullminilist(1,1,fullinteresting(0)) = prefullinteresting_det(1,1,ii) - fullminilist(1,2,fullinteresting(0)) = prefullinteresting_det(1,2,ii) + fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i) + fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i) do j=2,N_int - fullminilist(j,1,fullinteresting(0)) = prefullinteresting_det(j,1,ii) - fullminilist(j,2,fullinteresting(0)) = prefullinteresting_det(j,2,ii) + fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i) + fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i) enddo end if end do @@ -453,6 +446,9 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p if(s1 == s2) ib = i1+1 monoAdo = .true. do i2=N_holes(s2),ib,-1 ! Generate low excitations first + logical :: banned(mo_tot_num, mo_tot_num,2) + logical :: bannedOrb(mo_tot_num, 2) + maskInd += 1 if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then h2 = hole_list(i2,s2) @@ -492,9 +488,6 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p enddo enddo enddo - deallocate(bannedOrb, banned, prefullinteresting_det, preinteresting_det) - deallocate(minilist, fullminilist) - deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) end diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index d9481886..026921d0 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -405,7 +405,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 - logical, allocatable :: utloop(:) integer, allocatable :: shortcut(:,:), sort_idx(:,:) integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:) @@ -428,7 +427,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - allocate( ut(N_st_8,n), utloop(n) ) + allocate( ut(N_st_8,n)) v_0 = 0.d0 s_0 = 0.d0 @@ -438,19 +437,16 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8,utloop) + !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 !$OMP DO do i=1,n - utloop(i) = .False. do istate=1,N_st ut(istate,i) = u_0(sort_idx(i,2),istate) - utloop(i) = utloop(i) .or. (dabs(u_0(sort_idx(i,2),istate)) > 1.d-20) enddo - utloop(i) = .not.utloop(i) enddo !$OMP END DO @@ -459,7 +455,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) do j=shortcut(sh,2),shortcut(sh+1,2)-1 - if (utloop(j)) cycle org_j = sort_idx(j,2) ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) if (ext > 4) cycle @@ -482,12 +477,9 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP DO do i=1,n - utloop(i) = .False. do istate=1,N_st ut(istate,i) = u_0(sort_idx(i,1),istate) - utloop(i) = utloop(i) .or. (dabs(u_0(sort_idx(i,2),istate)) > 1.d-20) enddo - utloop(i) = .not.utloop(i) enddo !$OMP END DO @@ -511,7 +503,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 - if (utloop(j)) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint @@ -549,7 +540,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=shortcut(sh,1),i-1 - if (utloop(j)) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint @@ -576,7 +566,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=i+1,shortcut(sh+1,1)-1 - if (utloop(j)) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index 707be0f2..1ff8cb73 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -259,7 +259,8 @@ subroutine make_s2_eigenfunction call occ_pattern_to_dets_size(psi_occ_pattern(1,1,1),s,elec_alpha_num,N_int) allocate (d(N_int,2,s), det_buffer(N_int,2,bufsze) ) smax = s - ithread = omp_get_thread_num() + ithread=0 + !$ ithread = omp_get_thread_num() !$OMP DO do i=1,N_occ_pattern call occ_pattern_to_dets_size(psi_occ_pattern(1,1,i),s,elec_alpha_num,N_int) From 3fd719ed7e2c1c42271a8119408d1390dbcdeb85 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 10 Feb 2017 19:52:44 +0100 Subject: [PATCH 061/106] fragmentation bug in selection.irp.f --- plugins/Full_CI_ZMQ/selection.irp.f | 47 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index de7c93f8..a1b4d007 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -448,32 +448,31 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) - maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then - h2 = hole_list(i2,s2) - call apply_hole(pmask, s2,h2, mask, ok, N_int) - banned = .false. - do j=1,mo_tot_num - bannedOrb(j, 1) = .true. - bannedOrb(j, 2) = .true. + h2 = hole_list(i2,s2) + call apply_hole(pmask, s2,h2, mask, ok, N_int) + banned = .false. + do j=1,mo_tot_num + bannedOrb(j, 1) = .true. + bannedOrb(j, 2) = .true. + enddo + do s3=1,2 + do i=1,N_particles(s3) + bannedOrb(particle_list(i,s3), s3) = .false. enddo - do s3=1,2 - do i=1,N_particles(s3) - bannedOrb(particle_list(i,s3), s3) = .false. - enddo - enddo - - if(s1 /= s2) then - if(monoBdo) then - bannedOrb(h1,s1) = .false. - end if - if(monoAdo) then - bannedOrb(h2,s2) = .false. - monoAdo = .false. - end if + enddo + if(s1 /= s2) then + if(monoBdo) then + bannedOrb(h1,s1) = .false. end if - - + if(monoAdo) then + bannedOrb(h2,s2) = .false. + monoAdo = .false. + end if + end if + + maskInd += 1 + if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle From 63130aa62780364c5f9a1406c813de55bdedd136 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 10 Feb 2017 23:46:14 +0100 Subject: [PATCH 062/106] Fixed travis --- plugins/Full_CI_ZMQ/selection.irp.f | 4 ++-- plugins/MRCC_Utils/H_apply.irp.f | 2 +- tests/bats/cassd.bats | 4 ++-- tests/bats/mrcepa0.bats | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 8f8761a3..85c1a8df 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -2,8 +2,8 @@ use bitmasks BEGIN_PROVIDER [ integer, fragment_count ] implicit none -! fragment_count = (elec_alpha_num-n_core_orb)**2 - fragment_count = 1 + fragment_count = (elec_alpha_num-n_core_orb)**2 +! fragment_count = 1 END_PROVIDER diff --git a/plugins/MRCC_Utils/H_apply.irp.f b/plugins/MRCC_Utils/H_apply.irp.f index 4d8964bf..d8dfb62d 100644 --- a/plugins/MRCC_Utils/H_apply.irp.f +++ b/plugins/MRCC_Utils/H_apply.irp.f @@ -31,7 +31,7 @@ s.set_perturbation("epstein_nesbet_2x2") s.unset_openmp() print s -s = H_apply_zmq("mrcepa_PT2") +s = H_apply("mrcepa_PT2") s.energy = "psi_energy" s.set_perturbation("epstein_nesbet_2x2") s.unset_openmp() diff --git a/tests/bats/cassd.bats b/tests/bats/cassd.bats index 2a8fabc2..1b845e91 100644 --- a/tests/bats/cassd.bats +++ b/tests/bats/cassd.bats @@ -15,12 +15,12 @@ source $QP_ROOT/tests/bats/common.bats.sh energy="$(ezfio get cas_sd_zmq energy_pt2)" eq $energy -76.231084536315 5.E-5 - ezfio set determinants n_det_max 2048 + ezfio set determinants n_det_max 1024 ezfio set determinants read_wf True ezfio set perturbation do_pt2_end True qp_run cassd_zmq $INPUT ezfio set determinants read_wf False energy="$(ezfio get cas_sd_zmq energy)" - eq $energy -76.2300887947446 2.E-5 + eq $energy -76.2225863580749 2.E-5 } diff --git a/tests/bats/mrcepa0.bats b/tests/bats/mrcepa0.bats index 6bca8b7e..2420955c 100644 --- a/tests/bats/mrcepa0.bats +++ b/tests/bats/mrcepa0.bats @@ -16,7 +16,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.2385617521816 1.e-4 + eq $energy -76.2382106224545 1.e-4 } @test "MRCC H2O cc-pVDZ" { @@ -32,7 +32,7 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.2385052514433 2.e-4 + eq $energy -76.2381673136696 2.e-4 } @test "MRSC2 H2O cc-pVDZ" { @@ -64,6 +64,6 @@ source $QP_ROOT/tests/bats/common.bats.sh ezfio set mrcepa0 n_it_max_dressed_ci 3 qp_run $EXE $INPUT energy="$(ezfio get mrcepa0 energy_pt2)" - eq $energy -76.2417725924747 2.e-4 + eq $energy -76.2411829210128 2.e-4 } From 233b82c58da7084751c86f3dd339408d3b219c52 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 10 Feb 2017 23:49:33 +0100 Subject: [PATCH 063/106] Readme --- README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c9e1b12d..b15654aa 100644 --- a/README.md +++ b/README.md @@ -82,11 +82,11 @@ If you have set the `--developement` flag you can go in any module directory and ### 4) Compiling the OCaml - make -C ocaml + make -C $QP_ROOT/ocaml ### 5) Testing if all is ok - cd tests ; bats bats/qp.bats + cd tests ; ./run_tests.sh @@ -137,10 +137,6 @@ interface: ezfio #FAQ -### Opam error: cryptokit - -You need to install `gmp-dev`. - ### Error: ezfio_* is already defined. #### Why ? @@ -166,5 +162,5 @@ It's caused when we call the DGEMM routine of LAPACK. ##### Fix -Set `ulimit -s unlimited`, before runing `qp_run`. It seem to fix the problem. +Set `ulimit -s unlimited`, before runing `qp_run`. It seems to fix the problem. From da50bc6f728075c687b1bf77d4597d5116e0aba0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 12 Feb 2017 09:37:00 +0100 Subject: [PATCH 064/106] Provide fragment count --- plugins/Full_CI_ZMQ/selection.irp.f | 2 ++ plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f | 4 ++-- plugins/Full_CI_ZMQ/selection_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 2 ++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 85c1a8df..1d819dfc 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -293,6 +293,8 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p logical :: monoAdo, monoBdo; integer :: maskInd + PROVIDE fragment_count + monoAdo = .true. monoBdo = .true. diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 695866ed..d56df13e 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -12,8 +12,8 @@ program selection_slave end subroutine provide_everything - PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context mo_mono_elec_integral -! PROVIDE pt2_e0_denominator mo_tot_num N_int + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context + PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count end subroutine run_wf diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index 12a05601..92c6b775 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -13,7 +13,7 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context - PROVIDE pt2_e0_denominator mo_tot_num N_int + PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count end subroutine run_wf diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index 75992273..b4be5638 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -13,6 +13,8 @@ subroutine ZMQ_selection(N_in, pt2) double precision, intent(out) :: pt2(N_states) + PROVIDE fragment_count + if (.True.) then PROVIDE pt2_e0_denominator N = max(N_in,1) From c390ee32665644b02c797bbcc2484e83aa8bb341 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 13 Feb 2017 00:04:03 +0100 Subject: [PATCH 065/106] Format in print --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 4 +++- src/Determinants/occ_pattern.irp.f | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index ac98bd9c..745dffac 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -263,7 +263,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) time = omp_get_wtime() - print "(A, 4(E15.7), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) + print "(A, 4(E20.13), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) if (dabs(eqt/avg) < relative_error) then pt2(1) = avg exit pullLoop diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 1d819dfc..95cc30cd 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -2,8 +2,10 @@ use bitmasks BEGIN_PROVIDER [ integer, fragment_count ] implicit none + BEGIN_DOC + ! Number of fragments for the deterministic part + END_DOC fragment_count = (elec_alpha_num-n_core_orb)**2 -! fragment_count = 1 END_PROVIDER diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index 1ff8cb73..e8bb7466 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -36,7 +36,7 @@ subroutine occ_pattern_to_dets_size(o,sze,n_alpha,Nint) amax -= popcnt( o(k,2) ) enddo sze = int( min(binom_func(bmax, amax), 1.d8) ) - sze = sze*sze + 10 + sze = 2*sze*sze + 16 end @@ -290,6 +290,7 @@ subroutine make_s2_eigenfunction if (N_det_new > 0) then call fill_H_apply_buffer_no_selection(N_det_new,det_buffer,N_int,ithread) endif + !$OMP BARRIER deallocate(d,det_buffer) !$OMP END PARALLEL From 9d2c209c057ecddd1fd05ade1f84877f729ac7d8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 14 Feb 2017 15:44:39 +0100 Subject: [PATCH 066/106] target_pt2_ratio_cassd.irp.f --- plugins/CAS_SD_ZMQ/cassd_zmq.irp.f | 121 ------------------ plugins/CAS_SD_ZMQ/selection.irp.f | 118 +++++++++++++++++ .../CAS_SD_ZMQ/target_pt2_ratio_cassd.irp.f | 109 ++++++++++++++++ 3 files changed, 227 insertions(+), 121 deletions(-) create mode 100644 plugins/CAS_SD_ZMQ/target_pt2_ratio_cassd.irp.f diff --git a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f index 881f74c3..5b364400 100644 --- a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f +++ b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f @@ -132,124 +132,3 @@ program fci_zmq call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before(1)+pt2(1)) end - - - - -subroutine ZMQ_selection(N_in, pt2) - use f77_zmq - use selection_types - - implicit none - - character*(512) :: task - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer, intent(in) :: N_in - type(selection_buffer) :: b - integer :: i, N - integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - - - if (.True.) then - PROVIDE pt2_e0_denominator - N = max(N_in,1) - provide nproc - call new_parallel_job(zmq_to_qp_run_socket,"selection") - call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(N, N*2, b) - endif - - integer :: i_generator, i_generator_start, i_generator_max, step -! step = int(max(1.,10*elec_num/mo_tot_num) - - step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) - step = max(1,step) - do i= 1, N_det_generators,step - i_generator_start = i - i_generator_max = min(i+step-1,N_det_generators) - write(task,*) i_generator_start, i_generator_max, 1, N - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) - i = omp_get_thread_num() - if (i==0) then - call selection_collector(b, pt2) - else - call selection_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'selection') - if (N_in > 0) then - call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN - call copy_H_apply_buffer_to_wf() - if (s2_eig) then - call make_s2_eigenfunction - endif - endif -end subroutine - - -subroutine selection_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_selection_slave(1,i,pt2_e0_denominator) -end - -subroutine selection_collector(b, pt2) - use f77_zmq - use selection_types - use bitmasks - implicit none - - - type(selection_buffer), intent(inout) :: b - double precision, intent(out) :: pt2(N_states) - double precision :: pt2_mwen(N_states) - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - integer(ZMQ_PTR) :: zmq_socket_pull - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, ntask - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) - integer, allocatable :: task_id(:) - integer :: done - real :: time, time0 - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) - done = 0 - more = 1 - pt2(:) = 0d0 - call CPU_TIME(time0) - do while (more == 1) - call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) - pt2 += pt2_mwen - do i=1, N - call add_to_selection_buffer(b, det(1,1,i), val(i)) - end do - - do i=1, ntask - if(task_id(i) == 0) then - print *, "Error in collector" - endif - call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) - end do - done += ntask - call CPU_TIME(time) -! print *, "DONE" , done, time - time0 - end do - - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_pull_socket(zmq_socket_pull) - call sort_selection_buffer(b) -end subroutine - diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f index db8ebbf0..70230e9e 100644 --- a/plugins/CAS_SD_ZMQ/selection.irp.f +++ b/plugins/CAS_SD_ZMQ/selection.irp.f @@ -1205,3 +1205,121 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) end do genl end subroutine + +subroutine ZMQ_selection(N_in, pt2) + use f77_zmq + use selection_types + + implicit none + + character*(512) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, intent(in) :: N_in + type(selection_buffer) :: b + integer :: i, N + integer, external :: omp_get_thread_num + double precision, intent(out) :: pt2(N_states) + + + if (.True.) then + PROVIDE pt2_e0_denominator + N = max(N_in,1) + provide nproc + call new_parallel_job(zmq_to_qp_run_socket,"selection") + call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(N, N*2, b) + endif + + integer :: i_generator, i_generator_start, i_generator_max, step +! step = int(max(1.,10*elec_num/mo_tot_num) + + step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) + step = max(1,step) + do i= 1, N_det_generators,step + i_generator_start = i + i_generator_max = min(i+step-1,N_det_generators) + write(task,*) i_generator_start, i_generator_max, 1, N + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call selection_collector(b, pt2) + else + call selection_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'selection') + if (N_in > 0) then + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN + call copy_H_apply_buffer_to_wf() + if (s2_eig) then + call make_s2_eigenfunction + endif + endif +end subroutine + + +subroutine selection_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_selection_slave(1,i,pt2_e0_denominator) +end + +subroutine selection_collector(b, pt2) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + type(selection_buffer), intent(inout) :: b + double precision, intent(out) :: pt2(N_states) + double precision :: pt2_mwen(N_states) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, N, ntask + double precision, allocatable :: val(:) + integer(bit_kind), allocatable :: det(:,:,:) + integer, allocatable :: task_id(:) + integer :: done + real :: time, time0 + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) + done = 0 + more = 1 + pt2(:) = 0d0 + call CPU_TIME(time0) + do while (more == 1) + call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) + pt2 += pt2_mwen + do i=1, N + call add_to_selection_buffer(b, det(1,1,i), val(i)) + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) + end do + done += ntask + call CPU_TIME(time) +! print *, "DONE" , done, time - time0 + end do + + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + call sort_selection_buffer(b) +end subroutine + diff --git a/plugins/CAS_SD_ZMQ/target_pt2_ratio_cassd.irp.f b/plugins/CAS_SD_ZMQ/target_pt2_ratio_cassd.irp.f new file mode 100644 index 00000000..cf934a46 --- /dev/null +++ b/plugins/CAS_SD_ZMQ/target_pt2_ratio_cassd.irp.f @@ -0,0 +1,109 @@ +program fci_zmq + implicit none + integer :: i,j,k + logical, external :: detEq + + double precision, allocatable :: pt2(:) + integer :: Nmin, Nmax + integer :: n_det_before, to_select + double precision :: threshold_davidson_in, ratio, E_ref + + double precision, allocatable :: psi_coef_ref(:,:) + integer(bit_kind), allocatable :: psi_det_ref(:,:,:) + + + allocate (pt2(N_states)) + + pt2 = 1.d0 + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson + + ! Stopping criterion is the PT2max + + double precision :: E_CI_before(N_states) + do while (dabs(pt2(1)) > pt2_max) + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + do k=1, N_states + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', CI_energy(k) + print *, 'E(before)+PT2 = ', E_CI_before(k)+pt2(k) + enddo + print *, '-----' + E_CI_before(1:N_states) = CI_energy(1:N_states) + call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) + + n_det_before = N_det + to_select = N_det + to_select = max(64-to_select, to_select) + call ZMQ_selection(to_select, pt2) + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + call diagonalize_CI + call save_wavefunction + call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) + enddo + + threshold_selectors = max(threshold_selectors,threshold_selectors_pt2) + threshold_generators = max(threshold_generators,threshold_generators_pt2) + threshold_davidson = threshold_davidson_in + TOUCH threshold_selectors threshold_generators threshold_davidson + call diagonalize_CI + call ZMQ_selection(0, pt2) + + E_ref = CI_energy(1) + pt2(1) + print *, 'Est FCI = ', E_ref + + Nmax = N_det + Nmin = 2 + allocate (psi_coef_ref(size(psi_coef_sorted,1),size(psi_coef_sorted,2))) + allocate (psi_det_ref(N_int,2,size(psi_det_sorted,3))) + psi_coef_ref = psi_coef_sorted + psi_det_ref = psi_det_sorted + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + TOUCH psi_coef psi_det + do while (Nmax-Nmin > 1) + psi_coef = psi_coef_ref + psi_det = psi_det_ref + TOUCH psi_det psi_coef + call diagonalize_CI + ratio = (CI_energy(1) - HF_energy) / (E_ref - HF_energy) + if (ratio < var_pt2_ratio) then + Nmin = N_det + else + Nmax = N_det + psi_coef_ref = psi_coef + psi_det_ref = psi_det + TOUCH psi_det psi_coef + endif + N_det = Nmin + (Nmax-Nmin)/2 + print *, '-----' + print *, 'Det min, Det max: ', Nmin, Nmax + print *, 'Ratio : ', ratio, ' ~ ', var_pt2_ratio + print *, 'N_det = ', N_det + print *, 'E = ', CI_energy(1) + call save_wavefunction + enddo + call ZMQ_selection(0, pt2) + print *, '------' + print *, 'HF_energy = ', HF_energy + print *, 'Est FCI = ', E_ref + print *, 'E = ', CI_energy(1) + print *, 'PT2 = ', pt2(1) + print *, 'E+PT2 = ', CI_energy(1)+pt2(1) + + E_CI_before(1:N_states) = CI_energy(1:N_states) + call save_wavefunction + call ezfio_set_cas_sd_zmq_energy(CI_energy(1)) + call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before(1)+pt2(1)) +end + + + + From 8b1d083de9e97b20266ba0a511a16b94ba8723de Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 10:43:57 +0100 Subject: [PATCH 067/106] Memory in Davidson --- plugins/MRCC_Utils/mrcc_utils.irp.f | 100 ++++++++++++++++++----- plugins/Psiref_Utils/psi_ref_utils.irp.f | 3 +- src/Davidson/davidson_parallel.irp.f | 38 ++++----- src/Davidson/diagonalization_hs2.irp.f | 15 +++- src/Davidson/u0Hu0.irp.f | 76 +++++++---------- 5 files changed, 134 insertions(+), 98 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index d6b9cc79..374b003d 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -8,6 +8,7 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ] +&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ] implicit none BEGIN_DOC ! cm/ or perturbative 1/Delta_E(m) @@ -15,7 +16,7 @@ END_PROVIDER integer :: i,k double precision :: ihpsi_current(N_states) integer :: i_pert_count - double precision :: hii, lambda_pert + double precision :: hii, E2(N_states), E2var(N_states) integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3 i_pert_count = 0 @@ -25,6 +26,8 @@ END_PROVIDER lambda_mrcc_pt2(0) = 0 lambda_mrcc_kept(0) = 0 + E2 = 0.d0 + E2var = 0.d0 do i=1,N_det_non_ref call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& size(psi_ref_coef,1), N_states,ihpsi_current) @@ -33,24 +36,51 @@ END_PROVIDER if (ihpsi_current(k) == 0.d0) then ihpsi_current(k) = 1.d-32 endif -! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) - lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) - lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) - if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then - ! Ignore lamdba - i_pert_count += 1 - lambda_mrcc(k,i) = 0.d0 - if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then - N_lambda_mrcc_pt2 += 1 - lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i - endif - else - ! Keep lamdba - if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then - N_lambda_mrcc_pt3 += 1 - lambda_mrcc_kept(N_lambda_mrcc_pt3) = i - endif + lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) + lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) + E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii) + E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k) + enddo + enddo + + do i=1,N_det_non_ref + call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& + size(psi_ref_coef,1), N_states,ihpsi_current) + call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii) + do k=1,N_states + if (ihpsi_current(k) == 0.d0) then + ihpsi_current(k) = 1.d-32 endif + lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) + lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k) +! lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) +! if ( lambda_pert / lambda_mrcc(k,i) < 0.5d0) then +! print *, ' xxx ', dabs(lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) +! if ( dabs( (lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) ) > 1.d-3) then +! print *, 'lambda mrcc', lambda_mrcc(k,i) +! print *, 'lambda pert', lambda_pert +! print *, 'coef: ', psi_non_ref_coef(i,k) +! call debug_det(psi_non_ref(1,1,i), N_int) +! call i_H_j(psi_ref(1,1,1),psi_non_ref(1,1,i),N_int,hii) +! print *, hii +! call i_H_j(psi_ref(1,1,2),psi_non_ref(1,1,i),N_int,hii) +! print *, hii +! print *, '---' + ! Ignore lamdba +! i_pert_count += 1 +! lambda_mrcc(k,i) = 0.d0 +! lambda_mrcc(k,i) = lambda_pert * E2var(k)/E2(k) +! if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then +! N_lambda_mrcc_pt2 += 1 +! lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i +! endif +! else +! ! Keep lamdba +! if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then +! N_lambda_mrcc_pt3 += 1 +! lambda_mrcc_kept(N_lambda_mrcc_pt3) = i +! endif +! endif enddo enddo lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2 @@ -784,8 +814,8 @@ END_PROVIDER f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) ! Avoid numerical instabilities - f = min(f,2.d0) - f = max(f,-2.d0) +! f = min(f,2.d0) +! f = max(f,-2.d0) endif norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) @@ -845,11 +875,39 @@ END_PROVIDER +double precision function f_fit(x) + implicit none + double precision :: x + f_fit = 0.d0 + return + if (x < 0.d0) then + f_fit = 0.d0 + else if (x < 1.d0) then + f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2)) + else + f_fit = 1.d0 + endif +end double precision function get_dij_index(II, i, s, Nint) integer, intent(in) :: II, i, s, Nint double precision, external :: get_dij - double precision :: HIi, phase + double precision :: HIi, phase, c, a, b, d + + call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) + call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) + + a = lambda_pert(s,i) +! b = lambda_mrcc(s,i) +! c = f_fit(a/b) + + d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s) + + c = f_fit(a*HIi/d) + +! get_dij_index = HIi * a * c + (1.d0 - c) * d + get_dij_index = d + return if(lambda_type == 0) then call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f index c4147ebc..95c993f0 100644 --- a/plugins/Psiref_Utils/psi_ref_utils.irp.f +++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f @@ -98,8 +98,7 @@ END_PROVIDER enddo N_det_non_ref = i_non_ref if (N_det_non_ref < 1) then - print *, 'Error : All determinants are in the reference' - stop -1 + print *, 'Warning : All determinants are in the reference' endif END_PROVIDER diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 724aac08..04b0cc52 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -144,13 +144,13 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t) end subroutine -subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut) +subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st) use f77_zmq implicit none integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket - integer, intent(in) :: n, n_st_8 - double precision, intent(in) :: ut(n_st_8,n) + integer, intent(in) :: n0,n, n_st + double precision, intent(in) :: u(n0,n_st) integer :: i,k @@ -164,8 +164,8 @@ subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut) enddo enddo do i=1,n - do k=1,N_states_diag - dav_ut(k,i) = ut(k,i) + do k=1,n_st + dav_ut(k,i) = u(i,k) enddo enddo @@ -285,6 +285,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id) end do + deallocate(idx, vt, st) end subroutine @@ -411,8 +412,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD allocate(v0t(N_states_diag, dav_size)) allocate(s0t(N_states_diag, dav_size)) - v0t = 00.d0 - s0t = 00.d0 + v0t = 0.d0 + s0t = 0.d0 more = 1 @@ -456,23 +457,12 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA) double precision , intent(inout) :: s0(LDA, N_states_diag) - PROVIDE nproc - - !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i) - i = omp_get_thread_num() - if (i == 0 ) then - zmq_collector = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) - call end_zmq_to_qp_run_socket(zmq_collector) - call end_zmq_pull_socket(zmq_socket_pull) - call davidson_miniserver_end() - else if (i == 1 ) then - call davidson_miniserver_run () - else - call davidson_slave_inproc(i) - endif - !$OMP END PARALLEL + zmq_collector = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) + call end_zmq_to_qp_run_socket(zmq_collector) + call end_zmq_pull_socket(zmq_socket_pull) + call davidson_miniserver_end() end subroutine diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 1901525b..4b36e030 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -122,6 +122,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s stop -1 endif + integer, external :: align_double + sze_8 = align_double(sze) + itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) + PROVIDE nuclear_repulsion expected_s2 call write_time(iunit) @@ -134,6 +138,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call write_int(iunit,N_st,'Number of states') call write_int(iunit,N_st_diag,'Number of states in diagonalization') call write_int(iunit,sze,'Number of determinants') + r1 = 8.d0*(3.d0*dble(sze_8*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 & + + 4.d0*(N_st_diag*itermax))/(1024.d0**3)) + call write_double(iunit, r1, 'Memory(Gb)') write(iunit,'(A)') '' write_buffer = '===== ' do i=1,N_st @@ -151,14 +158,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo write(iunit,'(A)') trim(write_buffer) - integer, external :: align_double - sze_8 = align_double(sze) - - itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) + allocate( & + ! Large W(sze_8,N_st_diag*itermax), & U(sze_8,N_st_diag*itermax), & S(sze_8,N_st_diag*itermax), & + + ! Small h(N_st_diag*itermax,N_st_diag*itermax), & y(N_st_diag*itermax,N_st_diag*itermax), & s_(N_st_diag*itermax,N_st_diag*itermax), & diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 026921d0..42e61b3a 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -57,8 +57,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - integer :: blockb, blockb2, istep - double precision :: ave_workload, workload, target_workload_inv !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st @@ -286,19 +284,16 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ double precision, intent(in) :: H_jj(n), S2_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 - double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0, ithread - integer, allocatable :: shortcut(:,:), sort_idx(:) - integer(bit_kind), allocatable :: sorted(:,:), version(:,:) integer(bit_kind) :: sorted_i(Nint) integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate integer :: N_st_8 integer, external :: align_double - integer :: blockb, blockb2, istep + integer :: blockb2, istep double precision :: ave_workload, workload, target_workload_inv integer(ZMQ_PTR) :: handler @@ -311,61 +306,52 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ ASSERT (n>0) PROVIDE ref_bitmask_energy - allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n)) - allocate(ut(N_st_8,n)) - v_0 = 0.d0 s_0 = 0.d0 - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(i,istate) - enddo - enddo - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) - call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) - - blockb = shortcut(0,1) - call davidson_init(handler,n,N_st_8,ut) + call davidson_init(handler,u_0,size(u_0,1),n,N_st) + + ave_workload = 0.d0 + do sh=1,shortcut_(0,1) + ave_workload += shortcut_(0,1) + ave_workload += (shortcut_(sh+1,1) - shortcut_(sh,1))**2 + do i=sh, shortcut_(0,2), shortcut_(0,1) + do j=i, min(i, shortcut_(0,2)) + ave_workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 + end do + end do + enddo + ave_workload = ave_workload/dble(shortcut_(0,1)) + target_workload_inv = 0.01d0/ave_workload - PROVIDE nproc - - !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread) + + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, & + !$OMP workload,istep,blockb2) ithread = omp_get_thread_num() if (ithread == 0 ) then - - call zmq_set_running(handler) - ave_workload = 0.d0 - do sh=1,shortcut(0,1) - ave_workload += shortcut(0,1) - ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2 - end do - end do - enddo - ave_workload = ave_workload/dble(shortcut(0,1)) - target_workload_inv = 0.01d0/ave_workload - - - do sh=1,shortcut(0,1),1 - workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - workload += (shortcut(j+1,2) - shortcut(j, 2))**2 + do sh=1,shortcut_(0,1),1 + workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2 + do i=sh, shortcut_(0,2), shortcut_(0,1) + do j=i, min(i, shortcut_(0,2)) + workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 end do end do istep = 1+ int(workload*target_workload_inv) do blockb2=0, istep-1 call davidson_add_task(handler, sh, blockb2, istep) enddo + if (sh == shortcut_(0,1)/10 + 1) then + !$OMP BARRIER + endif enddo + call zmq_set_running(handler) call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then + !$OMP BARRIER call davidson_miniserver_run () else + !$OMP BARRIER call davidson_slave_inproc(ithread) endif !$OMP END PARALLEL @@ -378,8 +364,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) enddo enddo - deallocate(shortcut, sort_idx, sorted, version) - deallocate(ut) end @@ -414,8 +398,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - integer :: blockb, blockb2, istep - double precision :: ave_workload, workload, target_workload_inv !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st From bfdda0b08a74c1842694d8179f13095dd6787705 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 19:24:19 +0100 Subject: [PATCH 068/106] Added OcamlLex parser for messages --- ocaml/Makefile | 1 + ocaml/Message.ml | 135 +++++++++++----------- ocaml/Message_lexer.mll | 245 ++++++++++++++++++++++++++++++++++++++++ ocaml/TaskServer.ml | 4 +- 4 files changed, 314 insertions(+), 71 deletions(-) create mode 100644 ocaml/Message_lexer.mll diff --git a/ocaml/Makefile b/ocaml/Makefile index 7d51986f..8519c973 100644 --- a/ocaml/Makefile +++ b/ocaml/Makefile @@ -13,6 +13,7 @@ LIBS= PKGS= OCAMLCFLAGS="-g -warn-error A" OCAMLBUILD=ocamlbuild -j 0 -syntax camlp4o -cflags $(OCAMLCFLAGS) -lflags $(OCAMLCFLAGS) +MLLFILES=$(wildcard *.mll) MLFILES=$(wildcard *.ml) ezfio.ml Qptypes.ml Input_auto_generated.ml qp_edit.ml MLIFILES=$(wildcard *.mli) git ALL_TESTS=$(patsubst %.ml,%.byte,$(wildcard test_*.ml)) diff --git a/ocaml/Message.ml b/ocaml/Message.ml index 68b866d5..3a1f5c57 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -110,7 +110,7 @@ module Disconnect_msg : sig { client_id: Id.Client.t ; state: State.t ; } - val create : state:string -> client_id:string -> t + val create : state:string -> client_id:int -> t val to_string : t -> string end = struct type t = @@ -118,7 +118,7 @@ end = struct state: State.t ; } let create ~state ~client_id = - { client_id = Id.Client.of_string client_id ; state = State.of_string state } + { client_id = Id.Client.of_int client_id ; state = State.of_string state } let to_string x = Printf.sprintf "disconnect %s %d" (State.to_string x.state) @@ -184,7 +184,7 @@ module DelTask_msg : sig { state: State.t; task_id: Id.Task.t } - val create : state:string -> task_id:string -> t + val create : state:string -> task_id:int -> t val to_string : t -> string end = struct type t = @@ -193,7 +193,7 @@ end = struct } let create ~state ~task_id = { state = State.of_string state ; - task_id = Id.Task.of_string task_id + task_id = Id.Task.of_int task_id } let to_string x = Printf.sprintf "del_task %s %d" @@ -230,7 +230,7 @@ module GetTask_msg : sig { client_id: Id.Client.t ; state: State.t ; } - val create : state:string -> client_id:string -> t + val create : state:string -> client_id:int -> t val to_string : t -> string end = struct type t = @@ -238,7 +238,7 @@ end = struct state: State.t ; } let create ~state ~client_id = - { client_id = Id.Client.of_string client_id ; state = State.of_string state } + { client_id = Id.Client.of_int client_id ; state = State.of_string state } let to_string x = Printf.sprintf "get_task %s %d" (State.to_string x.state) @@ -269,14 +269,14 @@ module GetPsi_msg : sig type t = { client_id: Id.Client.t ; } - val create : client_id:string -> t + val create : client_id:int -> t val to_string : t -> string end = struct type t = { client_id: Id.Client.t ; } let create ~client_id = - { client_id = Id.Client.of_string client_id } + { client_id = Id.Client.of_int client_id } let to_string x = Printf.sprintf "get_psi %d" (Id.Client.to_int x.client_id) @@ -365,14 +365,14 @@ module PutPsi_msg : sig n_det_selectors : Strictly_positive_int.t option; psi : Psi.t option } val create : - client_id:string -> - n_state:string -> - n_det:string -> - psi_det_size:string -> + client_id:int -> + n_state:int -> + n_det:int -> + psi_det_size:int -> psi_det:string option -> psi_coef:string option -> - n_det_generators: string option -> - n_det_selectors:string option -> + n_det_generators: int option -> + n_det_selectors:int option -> energy:string option -> t val to_string_list : t -> string list val to_string : t -> string @@ -388,20 +388,17 @@ end = struct let create ~client_id ~n_state ~n_det ~psi_det_size ~psi_det ~psi_coef ~n_det_generators ~n_det_selectors ~energy = let n_state, n_det, psi_det_size = - Int.of_string n_state - |> Strictly_positive_int.of_int , - Int.of_string n_det - |> Strictly_positive_int.of_int , - Int.of_string psi_det_size - |> Strictly_positive_int.of_int + Strictly_positive_int.of_int n_state, + Strictly_positive_int.of_int n_det, + Strictly_positive_int.of_int psi_det_size in assert (Strictly_positive_int.to_int psi_det_size >= Strictly_positive_int.to_int n_det); let n_det_generators, n_det_selectors = match n_det_generators, n_det_selectors with | Some x, Some y -> - Some (Strictly_positive_int.of_int @@ Int.of_string x), - Some (Strictly_positive_int.of_int @@ Int.of_string y) + Some (Strictly_positive_int.of_int x), + Some (Strictly_positive_int.of_int y) | _ -> None, None in let psi = @@ -411,7 +408,7 @@ end = struct ~psi_coef ~n_det_generators ~n_det_selectors ~energy) | _ -> None in - { client_id = Id.Client.of_string client_id ; + { client_id = Id.Client.of_int client_id ; n_state ; n_det ; psi_det_size ; n_det_generators ; n_det_selectors ; psi } @@ -465,7 +462,7 @@ module TaskDone_msg : sig state: State.t ; task_id: Id.Task.t ; } - val create : state:string -> client_id:string -> task_id:string -> t + val create : state:string -> client_id:int -> task_id:int -> t val to_string : t -> string end = struct type t = @@ -474,9 +471,9 @@ end = struct task_id: Id.Task.t; } let create ~state ~client_id ~task_id = - { client_id = Id.Client.of_string client_id ; + { client_id = Id.Client.of_int client_id ; state = State.of_string state ; - task_id = Id.Task.of_string task_id; + task_id = Id.Task.of_int task_id; } let to_string x = @@ -489,22 +486,22 @@ end (** Terminate *) module Terminate_msg : sig type t - val create : unit -> t + val create : t val to_string : t -> string end = struct type t = Terminate - let create () = Terminate + let create = Terminate let to_string x = "terminate" end (** OK *) module Ok_msg : sig type t - val create : unit -> t + val create : t val to_string : t -> string end = struct type t = Ok - let create () = Ok + let create = Ok let to_string x = "ok" end @@ -551,45 +548,45 @@ type t = let of_string s = - let l = - String.split ~on:' ' s - |> List.filter ~f:(fun x -> (String.strip x) <> "") - |> List.map ~f:String.lowercase - in - match l with - | "add_task" :: state :: task -> - AddTask (AddTask_msg.create ~state ~task:(String.concat ~sep:" " task) ) - | "del_task" :: state :: task_id :: [] -> - DelTask (DelTask_msg.create ~state ~task_id) - | "get_task" :: state :: client_id :: [] -> - GetTask (GetTask_msg.create ~state ~client_id) - | "task_done" :: state :: client_id :: task_id :: [] -> - TaskDone (TaskDone_msg.create ~state ~client_id ~task_id) - | "disconnect" :: state :: client_id :: [] -> - Disconnect (Disconnect_msg.create ~state ~client_id) - | "connect" :: t :: [] -> - Connect (Connect_msg.create t) - | "new_job" :: state :: push_address_tcp :: push_address_inproc :: [] -> - Newjob (Newjob_msg.create push_address_tcp push_address_inproc state) - | "end_job" :: state :: [] -> - Endjob (Endjob_msg.create state) - | "terminate" :: [] -> - Terminate (Terminate_msg.create () ) - | "get_psi" :: client_id :: [] -> - GetPsi (GetPsi_msg.create ~client_id) - | "put_psi" :: client_id :: n_state :: n_det :: psi_det_size :: n_det_generators :: n_det_selectors :: [] -> - PutPsi (PutPsi_msg.create ~client_id ~n_state ~n_det ~psi_det_size - ~n_det_generators:(Some n_det_generators) ~n_det_selectors:(Some n_det_selectors) - ~psi_det:None ~psi_coef:None ~energy:None ) - | "put_psi" :: client_id :: n_state :: n_det :: psi_det_size :: [] -> - PutPsi (PutPsi_msg.create ~client_id ~n_state ~n_det ~psi_det_size ~n_det_generators:None - ~n_det_selectors:None ~psi_det:None ~psi_coef:None ~energy:None) - | "ok" :: [] -> Ok (Ok_msg.create ()) - | "error" :: rest -> Error (Error_msg.create (String.concat ~sep:" " rest)) - | "set_stopped" :: [] -> SetStopped - | "set_running" :: [] -> SetRunning - | "set_waiting" :: [] -> SetWaiting - | _ -> failwith "Message not understood" + let open Message_lexer in + match parse s with + | AddTask_ { state ; task } -> + AddTask (AddTask_msg.create ~state ~task) + | DelTask_ { state ; task_id } -> + DelTask (DelTask_msg.create ~state ~task_id) + | GetTask_ { state ; client_id } -> + GetTask (GetTask_msg.create ~state ~client_id) + | TaskDone_ { state ; task_id ; client_id } -> + TaskDone (TaskDone_msg.create ~state ~client_id ~task_id) + | Disconnect_ { state ; client_id } -> + Disconnect (Disconnect_msg.create ~state ~client_id) + | Connect_ socket -> + Connect (Connect_msg.create socket) + | NewJob_ { state ; push_address_tcp ; push_address_inproc } -> + Newjob (Newjob_msg.create push_address_tcp push_address_inproc state) + | EndJob_ state -> + Endjob (Endjob_msg.create state) + | GetPsi_ client_id -> + GetPsi (GetPsi_msg.create ~client_id) + | PutPsi_ { client_id ; n_state ; n_det ; psi_det_size ; n_det_generators ; n_det_selectors } -> + begin + match n_det_selectors, n_det_generators with + | Some s, Some g -> + PutPsi (PutPsi_msg.create ~client_id ~n_state ~n_det ~psi_det_size + ~n_det_generators:(Some g) ~n_det_selectors:(Some s) + ~psi_det:None ~psi_coef:None ~energy:None ) + | _ -> + PutPsi (PutPsi_msg.create ~client_id ~n_state ~n_det ~psi_det_size + ~n_det_generators:None ~n_det_selectors:None + ~psi_det:None ~psi_coef:None ~energy:None ) + end + | Terminate_ -> Terminate (Terminate_msg.create ) + | SetWaiting_ -> SetWaiting + | SetStopped_ -> SetStopped + | SetRunning_ -> SetRunning + | Ok_ -> Ok (Ok_msg.create) + | Error_ m -> Error (Error_msg.create m) + let to_string = function diff --git a/ocaml/Message_lexer.mll b/ocaml/Message_lexer.mll new file mode 100644 index 00000000..45ffc4d4 --- /dev/null +++ b/ocaml/Message_lexer.mll @@ -0,0 +1,245 @@ +{ + +type kw_type = + | TEXT of string + | WORD of string + | INTEGER of int + | FLOAT of float + | NONE + | END_OF_FILE + | ADD_TASK + | DEL_TASK + | GET_TASK + | TASK_DONE + | DISCONNECT + | CONNECT + | NEW_JOB + | END_JOB + | TERMINATE + | GET_PSI + | PUT_PSI + | OK + | ERROR + | SET_STOPPED + | SET_RUNNING + | SET_WAITING + +type state_task = { state : string ; task : string ; } +type state_taskid = { state : string ; task_id : int ; } +type state_clientid = { state : string ; client_id : int ; } +type state_taskid_clientid = { state : string ; task_id : int ; client_id : int ; } +type state_tcp_inproc = { state : string ; push_address_tcp : string ; push_address_inproc : string ; } +type psi = { client_id: int ; n_state: int ; n_det: int ; psi_det_size: int ; + n_det_generators: int option ; n_det_selectors: int option } + +type msg = + | AddTask_ of state_task + | DelTask_ of state_taskid + | GetTask_ of state_clientid + | TaskDone_ of state_taskid_clientid + | Disconnect_ of state_clientid + | Connect_ of string + | NewJob_ of state_tcp_inproc + | EndJob_ of string + | Terminate_ + | GetPsi_ of int + | PutPsi_ of psi + | Ok_ + | Error_ of string + | SetStopped_ + | SetRunning_ + | SetWaiting_ +} + +let word = [^' ' '\t' '\n']+ +let text = [^' ']+[^'\n']+ +let integer = ['0'-'9']+ +let real = '-'? integer '.' integer (['e' 'E'] '-'? integer)? + +let white = [' ' '\t']+ + + +rule get_text = parse + | text as t { TEXT t } + | _ { NONE } + +and kw = parse + | integer as i { INTEGER (int_of_string i) } + | real as r { FLOAT (float_of_string r)} + | "add_task" { ADD_TASK } + | "del_task" { DEL_TASK } + | "get_task" { GET_TASK } + | "task_done" { TASK_DONE } + | "disconnect" { DISCONNECT } + | "connect" { CONNECT } + | "new_job" { NEW_JOB } + | "end_job" { END_JOB } + | "terminate" { TERMINATE } + | "get_psi" { GET_PSI } + | "put_psi" { PUT_PSI } + | "ok" { OK } + | "error" { ERROR } + | "set_stopped" { SET_STOPPED } + | "set_running" { SET_RUNNING } + | "set_waiting" { SET_WAITING } + | word as w { WORD w } + | eof { END_OF_FILE } + | _ { NONE } + + +{ + let rec read_text lexbuf = + let token = + get_text lexbuf + in + match token with + | TEXT t -> t + | NONE -> read_text lexbuf + | _ -> failwith "Error in MessageLexer (2)" + + and read_word lexbuf = + let token = + kw lexbuf + in + match token with + | WORD w -> w + | NONE -> read_word lexbuf + | _ -> failwith "Error in MessageLexer (3)" + + and read_int lexbuf = + let token = + kw lexbuf + in + match token with + | INTEGER i -> i + | NONE -> read_int lexbuf + | _ -> failwith "Error in MessageLexer (4)" + + and parse_rec lexbuf = + let token = + kw lexbuf + in + match token with + | ADD_TASK -> + let state = read_word lexbuf in + let task = read_text lexbuf in + AddTask_ { state ; task } + + | DEL_TASK -> + let state = read_word lexbuf in + let task_id = read_int lexbuf in + DelTask_ { state ; task_id } + + | GET_TASK -> + let state = read_word lexbuf in + let client_id = read_int lexbuf in + GetTask_ { state ; client_id } + + | TASK_DONE -> + let state = read_word lexbuf in + let client_id = read_int lexbuf in + let task_id = read_int lexbuf in + TaskDone_ { state ; task_id ; client_id } + + | DISCONNECT -> + let state = read_word lexbuf in + let client_id = read_int lexbuf in + Disconnect_ { state ; client_id } + + | GET_PSI -> + let client_id = read_int lexbuf in + GetPsi_ client_id + + | PUT_PSI -> + let client_id = read_int lexbuf in + let n_state = read_int lexbuf in + let n_det = read_int lexbuf in + let psi_det_size = read_int lexbuf in + let n_det_generators, n_det_selectors = + try + (Some (read_int lexbuf), Some (read_int lexbuf)) + with (Failure _) -> (None, None) + in + PutPsi_ { client_id ; n_state ; n_det ; psi_det_size ; n_det_generators ; n_det_selectors } + + | CONNECT -> + let socket = read_word lexbuf in + Connect_ socket + + | NEW_JOB -> + let state = read_word lexbuf in + let push_address_tcp = read_word lexbuf in + let push_address_inproc = read_word lexbuf in + NewJob_ { state ; push_address_tcp ; push_address_inproc } + + | END_JOB -> + let state = read_word lexbuf in + EndJob_ state + + | ERROR -> + let message = read_text lexbuf in + Error_ message + + | OK -> Ok_ + | SET_WAITING -> SetWaiting_ + | SET_RUNNING -> SetRunning_ + | SET_STOPPED -> SetStopped_ + | TERMINATE -> Terminate_ + | NONE -> parse_rec lexbuf + | _ -> failwith "Error in MessageLexer" + + let parse message = + let lexbuf = + Lexing.from_string message + in + parse_rec lexbuf + + + let debug () = + let l = [ + "add_task state_pouet Task pouet zob" ; + "del_task state_pouet 12345" ; + "get_task state_pouet 12" ; + "task_done state_pouet 12 12345"; + "connect tcp"; + "disconnect state_pouet 12"; + "new_job state_pouet tcp://test.com:12345 ipc:///dev/shm/x.socket"; + "end_job state_pouet"; + "terminate" ; + "set_running" ; + "set_stopped" ; + "set_waiting" ; + "ok" ; + "error my_error" ; + "get_psi 12" ; + "put_psi 12 2 1000 10000 800 900" ; + "put_psi 12 2 1000 10000" + ] + |> List.map parse + in + List.map (function + | AddTask_ { state ; task } -> Printf.sprintf "ADD_TASK state:\"%s\" task:\"%s\"" state task + | DelTask_ { state ; task_id } -> Printf.sprintf "DEL_TASK state:\"%s\" task_id:%d" state task_id + | GetTask_ { state ; client_id } -> Printf.sprintf "GET_TASK state:\"%s\" task_id:%d" state client_id + | TaskDone_ { state ; task_id ; client_id } -> Printf.sprintf "TASK_DONE state:\"%s\" task_id:%d client_id:%d" state task_id client_id + | Disconnect_ { state ; client_id } -> Printf.sprintf "DISCONNECT state:\"%s\" client_id:%d" state client_id + | Connect_ socket -> Printf.sprintf "CONNECT socket:\"%s\"" socket + | NewJob_ { state ; push_address_tcp ; push_address_inproc } -> Printf.sprintf "NEW_JOB state:\"%s\" tcp:\"%s\" inproc:\"%s\"" state push_address_tcp push_address_inproc + | EndJob_ state -> Printf.sprintf "END_JOB state:\"%s\"" state + | GetPsi_ client_id -> Printf.sprintf "GET_PSI client_id:%d" client_id + | PutPsi_ { client_id ; n_state ; n_det ; psi_det_size ; n_det_generators ; n_det_selectors } -> + begin + match n_det_selectors, n_det_generators with + | Some s, Some g -> Printf.sprintf "PUT_PSI client_id:%d n_state:%d n_det:%d psi_det_size:%d n_det_generators:%d n_det_selectors:%d" client_id n_state n_det psi_det_size g s + | _ -> Printf.sprintf "PUT_PSI client_id:%d n_state:%d n_det:%d psi_det_size:%d" client_id n_state n_det psi_det_size + end + | Terminate_ -> "TERMINATE" + | SetWaiting_ -> "SET_WAITING" + | SetStopped_ -> "SET_STOPPED" + | SetRunning_ -> "SET_RUNNING" + | Ok_ -> "OK" + | Error_ s -> Printf.sprintf "ERROR: \"%s\"" s + ) l + |> List.iter print_endline + +} diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 6edc8122..9d830437 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -99,7 +99,7 @@ let ip_address = lazy ( let reply_ok rep_socket = - Message.Ok_msg.create () + Message.Ok_msg.create |> Message.Ok_msg.to_string |> ZMQ.Socket.send rep_socket @@ -121,7 +121,7 @@ let stop ~port = ZMQ.Socket.set_linger_period req_socket 1_000_000; ZMQ.Socket.connect req_socket address; - Message.Terminate (Message.Terminate_msg.create ()) + Message.Terminate (Message.Terminate_msg.create) |> Message.to_string |> ZMQ.Socket.send req_socket ; From 1977e5b3c8c7e7b45f4fb723b9d147b5003da60d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 20:27:58 +0100 Subject: [PATCH 069/106] Fixed MRCC --- plugins/MRCC_Utils/mrcc_utils.irp.f | 213 +++++++++++++++++----------- 1 file changed, 131 insertions(+), 82 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 374b003d..043bc5ca 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -5,10 +5,10 @@ use bitmasks END_PROVIDER + BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ] -&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ] implicit none BEGIN_DOC ! cm/ or perturbative 1/Delta_E(m) @@ -16,7 +16,7 @@ END_PROVIDER integer :: i,k double precision :: ihpsi_current(N_states) integer :: i_pert_count - double precision :: hii, E2(N_states), E2var(N_states) + double precision :: hii, lambda_pert integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3 i_pert_count = 0 @@ -26,8 +26,6 @@ END_PROVIDER lambda_mrcc_pt2(0) = 0 lambda_mrcc_kept(0) = 0 - E2 = 0.d0 - E2var = 0.d0 do i=1,N_det_non_ref call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& size(psi_ref_coef,1), N_states,ihpsi_current) @@ -36,51 +34,24 @@ END_PROVIDER if (ihpsi_current(k) == 0.d0) then ihpsi_current(k) = 1.d-32 endif - lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) - lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) - E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii) - E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k) - enddo - enddo - - do i=1,N_det_non_ref - call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& - size(psi_ref_coef,1), N_states,ihpsi_current) - call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii) - do k=1,N_states - if (ihpsi_current(k) == 0.d0) then - ihpsi_current(k) = 1.d-32 - endif - lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) - lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k) -! lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) -! if ( lambda_pert / lambda_mrcc(k,i) < 0.5d0) then -! print *, ' xxx ', dabs(lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) -! if ( dabs( (lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) ) > 1.d-3) then -! print *, 'lambda mrcc', lambda_mrcc(k,i) -! print *, 'lambda pert', lambda_pert -! print *, 'coef: ', psi_non_ref_coef(i,k) -! call debug_det(psi_non_ref(1,1,i), N_int) -! call i_H_j(psi_ref(1,1,1),psi_non_ref(1,1,i),N_int,hii) -! print *, hii -! call i_H_j(psi_ref(1,1,2),psi_non_ref(1,1,i),N_int,hii) -! print *, hii -! print *, '---' +! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) + lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) + lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) + if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then ! Ignore lamdba -! i_pert_count += 1 -! lambda_mrcc(k,i) = 0.d0 -! lambda_mrcc(k,i) = lambda_pert * E2var(k)/E2(k) -! if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then -! N_lambda_mrcc_pt2 += 1 -! lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i -! endif -! else -! ! Keep lamdba -! if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then -! N_lambda_mrcc_pt3 += 1 -! lambda_mrcc_kept(N_lambda_mrcc_pt3) = i -! endif -! endif + i_pert_count += 1 + lambda_mrcc(k,i) = 0.d0 + if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then + N_lambda_mrcc_pt2 += 1 + lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i + endif + else + ! Keep lamdba + if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then + N_lambda_mrcc_pt3 += 1 + lambda_mrcc_kept(N_lambda_mrcc_pt3) = i + endif + endif enddo enddo lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2 @@ -92,6 +63,65 @@ END_PROVIDER END_PROVIDER +! BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ] +!&BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ] +!&BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ] +!&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ] +! implicit none +! BEGIN_DOC +! ! cm/ or perturbative 1/Delta_E(m) +! END_DOC +! integer :: i,k +! double precision :: ihpsi_current(N_states) +! integer :: i_pert_count +! double precision :: hii, E2(N_states), E2var(N_states) +! integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3 +! +! i_pert_count = 0 +! lambda_mrcc = 0.d0 +! N_lambda_mrcc_pt2 = 0 +! N_lambda_mrcc_pt3 = 0 +! lambda_mrcc_pt2(0) = 0 +! lambda_mrcc_kept(0) = 0 +! +! E2 = 0.d0 +! E2var = 0.d0 +! do i=1,N_det_non_ref +! call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& +! size(psi_ref_coef,1), N_states,ihpsi_current) +! call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii) +! do k=1,N_states +! if (ihpsi_current(k) == 0.d0) then +! ihpsi_current(k) = 1.d-32 +! endif +! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) +! lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) +! E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii) +! E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k) +! enddo +! enddo +! +! do i=1,N_det_non_ref +! call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& +! size(psi_ref_coef,1), N_states,ihpsi_current) +! call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii) +! do k=1,N_states +! if (ihpsi_current(k) == 0.d0) then +! ihpsi_current(k) = 1.d-32 +! endif +! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) +! lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k) +! enddo +! enddo +! lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2 +! lambda_mrcc_kept(0) = N_lambda_mrcc_pt3 +! print*,'N_det_non_ref = ',N_det_non_ref +! print*,'psi_coef_ref_ratio = ',psi_ref_coef(2,1)/psi_ref_coef(1,1) +! print*,'lambda max = ',maxval(dabs(lambda_mrcc)) + print*,'Number of ignored determinants = ',i_pert_count + +END_PROVIDER + BEGIN_PROVIDER [ double precision, hij_mrcc, (N_det_non_ref,N_det_ref) ] @@ -814,8 +844,8 @@ END_PROVIDER f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) ! Avoid numerical instabilities -! f = min(f,2.d0) -! f = max(f,-2.d0) + f = min(f,2.d0) + f = max(f,-2.d0) endif norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) @@ -869,45 +899,64 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] dij(j, i, s) = get_dij_index(j, i, s, N_int) end do end do - end do - print *, "done computing amplitudes" -END_PROVIDER +! end do +! print *, "done computing amplitudes" +!END_PROVIDER -double precision function f_fit(x) - implicit none - double precision :: x - f_fit = 0.d0 - return - if (x < 0.d0) then - f_fit = 0.d0 - else if (x < 1.d0) then - f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2)) - else - f_fit = 1.d0 - endif -end +!double precision function f_fit(x) +! implicit none +! double precision :: x +! f_fit = 0.d0 +! return +! if (x < 0.d0) then +! f_fit = 0.d0 +! else if (x < 1.d0) then +! f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2)) +! else +! f_fit = 1.d0 +! endif +!end +! +!double precision function get_dij_index(II, i, s, Nint) +! integer, intent(in) :: II, i, s, Nint +! double precision, external :: get_dij +! double precision :: HIi, phase, c, a, b, d +! +! call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) +! call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) +! +! a = lambda_pert(s,i) +! b = lambda_mrcc(s,i) +! c = f_fit(a/b) +! +! d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s) +! +! c = f_fit(a*HIi/d) +! +! get_dij_index = HIi * a * c + (1.d0 - c) * d +! get_dij_index = d +! return +! +! if(lambda_type == 0) then +! call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) +! get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase +! get_dij_index = get_dij_index * rho_mrcc(i,s) +! else if(lambda_type == 1) then +! call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) +! get_dij_index = HIi * lambda_mrcc(s, i) +! else if(lambda_type == 2) then +! call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) +! get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase +! get_dij_index = get_dij_index * rho_mrcc(i,s) +! end if +!end function double precision function get_dij_index(II, i, s, Nint) integer, intent(in) :: II, i, s, Nint double precision, external :: get_dij - double precision :: HIi, phase, c, a, b, d - - call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) - call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) - - a = lambda_pert(s,i) -! b = lambda_mrcc(s,i) -! c = f_fit(a/b) - - d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s) - - c = f_fit(a*HIi/d) - -! get_dij_index = HIi * a * c + (1.d0 - c) * d - get_dij_index = d - return + double precision :: HIi, phase if(lambda_type == 0) then call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) From 97a7a48a83a1716235d23ae63d6a744b34113396 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 20:29:15 +0100 Subject: [PATCH 070/106] Fixed MRCC --- plugins/MRCC_Utils/mrcc_utils.irp.f | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 043bc5ca..3ae28d45 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -118,9 +118,9 @@ END_PROVIDER ! print*,'N_det_non_ref = ',N_det_non_ref ! print*,'psi_coef_ref_ratio = ',psi_ref_coef(2,1)/psi_ref_coef(1,1) ! print*,'lambda max = ',maxval(dabs(lambda_mrcc)) - print*,'Number of ignored determinants = ',i_pert_count - -END_PROVIDER +! print*,'Number of ignored determinants = ',i_pert_count +! +!END_PROVIDER From 6437b08cd43b65377db1b16848a6415d5faa16f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 20:30:56 +0100 Subject: [PATCH 071/106] Fixed MRCC --- plugins/MRCC_Utils/mrcc_utils.irp.f | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 3ae28d45..e0da2f20 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -899,9 +899,9 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] dij(j, i, s) = get_dij_index(j, i, s, N_int) end do end do -! end do -! print *, "done computing amplitudes" -!END_PROVIDER + end do + print *, "done computing amplitudes" +END_PROVIDER From af7032cf0a29feade48cae67b90685c17b9958aa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 21:33:29 +0100 Subject: [PATCH 072/106] Save wf after selection --- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index b4be5638..838af9ef 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -52,6 +52,7 @@ subroutine ZMQ_selection(N_in, pt2) if (s2_eig) then call make_s2_eigenfunction endif + call save_wavefunction endif end subroutine From cc53cff93274eaca3d5c40bd03e07873f302e9e0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Feb 2017 21:44:28 +0100 Subject: [PATCH 073/106] Print nb occ patterns --- src/Determinants/occ_pattern.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Determinants/occ_pattern.irp.f b/src/Determinants/occ_pattern.irp.f index e8bb7466..38460f87 100644 --- a/src/Determinants/occ_pattern.irp.f +++ b/src/Determinants/occ_pattern.irp.f @@ -250,7 +250,7 @@ subroutine make_s2_eigenfunction integer, parameter :: bufsze = 1000 logical, external :: is_in_wavefunction - print *, 'Finding determinants for S^2...' + call write_int(6,N_occ_pattern,'Number of occupation patterns') !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(N_occ_pattern, psi_occ_pattern, elec_alpha_num,N_int) & @@ -297,6 +297,7 @@ subroutine make_s2_eigenfunction call copy_H_apply_buffer_to_wf SOFT_TOUCH N_det psi_coef psi_det print *, 'Added determinants for S^2' + call write_time(6) end From 0dea2e88c5e39a2752732b5f5343dd64ddf6fbd2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Feb 2017 13:28:36 +0100 Subject: [PATCH 074/106] Removed triangle --- ocaml/Message.ml | 60 ++++++++--------- ocaml/Message_lexer.mll | 78 ++++++++++++++-------- ocaml/TaskServer.ml | 77 +++++---------------- src/Integrals_Bielec/ao_bi_integrals.irp.f | 13 ++-- 4 files changed, 103 insertions(+), 125 deletions(-) diff --git a/ocaml/Message.ml b/ocaml/Message.ml index 3a1f5c57..2ed38864 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -150,18 +150,18 @@ end module AddTask_msg : sig type t = { state: State.t; - task: string; + tasks: string list; } - val create : state:string -> task:string -> t + val create : state:string -> tasks:string list -> t val to_string : t -> string end = struct type t = { state: State.t; - task: string; + tasks: string list; } - let create ~state ~task = { state = State.of_string state ; task } + let create ~state ~tasks = { state = State.of_string state ; tasks } let to_string x = - Printf.sprintf "add_task %s %s" (State.to_string x.state) x.task + Printf.sprintf "add_task %s %s" (State.to_string x.state) (String.concat ~sep:"|" x.tasks) end @@ -182,44 +182,44 @@ end module DelTask_msg : sig type t = { state: State.t; - task_id: Id.Task.t + task_ids: Id.Task.t list } - val create : state:string -> task_id:int -> t + val create : state:string -> task_ids:int list -> t val to_string : t -> string end = struct type t = { state: State.t; - task_id: Id.Task.t + task_ids: Id.Task.t list } - let create ~state ~task_id = + let create ~state ~task_ids = { state = State.of_string state ; - task_id = Id.Task.of_int task_id + task_ids = List.map ~f:Id.Task.of_int task_ids } let to_string x = - Printf.sprintf "del_task %s %d" + Printf.sprintf "del_task %s %s" (State.to_string x.state) - (Id.Task.to_int x.task_id) + (String.concat ~sep:"|" @@ List.map ~f:Id.Task.to_string x.task_ids) end (** DelTaskReply : Reply to the DelTask message *) module DelTaskReply_msg : sig type t - val create : task_id:Id.Task.t -> more:bool -> t + val create : task_ids:Id.Task.t list -> more:bool -> t val to_string : t -> string end = struct type t = { - task_id : Id.Task.t ; + task_ids : Id.Task.t list; more : bool; } - let create ~task_id ~more = { task_id ; more } + let create ~task_ids ~more = { task_ids ; more } let to_string x = let more = if x.more then "more" else "done" in - Printf.sprintf "del_task_reply %s %d" - more (Id.Task.to_int x.task_id) + Printf.sprintf "del_task_reply %s %s" + more (String.concat ~sep:"|" @@ List.map ~f:Id.Task.to_string x.task_ids) end @@ -460,27 +460,27 @@ module TaskDone_msg : sig type t = { client_id: Id.Client.t ; state: State.t ; - task_id: Id.Task.t ; + task_ids: Id.Task.t list ; } - val create : state:string -> client_id:int -> task_id:int -> t + val create : state:string -> client_id:int -> task_ids:int list -> t val to_string : t -> string end = struct type t = { client_id: Id.Client.t ; state: State.t ; - task_id: Id.Task.t; + task_ids: Id.Task.t list; } - let create ~state ~client_id ~task_id = + let create ~state ~client_id ~task_ids = { client_id = Id.Client.of_int client_id ; state = State.of_string state ; - task_id = Id.Task.of_int task_id; + task_ids = List.map ~f:Id.Task.of_int task_ids; } let to_string x = - Printf.sprintf "task_done %s %d %d" + Printf.sprintf "task_done %s %d %s" (State.to_string x.state) (Id.Client.to_int x.client_id) - (Id.Task.to_int x.task_id) + (String.concat ~sep:"|" @@ List.map ~f:Id.Task.to_string x.task_ids) end (** Terminate *) @@ -550,14 +550,14 @@ type t = let of_string s = let open Message_lexer in match parse s with - | AddTask_ { state ; task } -> - AddTask (AddTask_msg.create ~state ~task) - | DelTask_ { state ; task_id } -> - DelTask (DelTask_msg.create ~state ~task_id) + | AddTask_ { state ; tasks } -> + AddTask (AddTask_msg.create ~state ~tasks) + | DelTask_ { state ; task_ids } -> + DelTask (DelTask_msg.create ~state ~task_ids) | GetTask_ { state ; client_id } -> GetTask (GetTask_msg.create ~state ~client_id) - | TaskDone_ { state ; task_id ; client_id } -> - TaskDone (TaskDone_msg.create ~state ~client_id ~task_id) + | TaskDone_ { state ; task_ids ; client_id } -> + TaskDone (TaskDone_msg.create ~state ~client_id ~task_ids) | Disconnect_ { state ; client_id } -> Disconnect (Disconnect_msg.create ~state ~client_id) | Connect_ socket -> diff --git a/ocaml/Message_lexer.mll b/ocaml/Message_lexer.mll index 45ffc4d4..c67f4528 100644 --- a/ocaml/Message_lexer.mll +++ b/ocaml/Message_lexer.mll @@ -6,7 +6,6 @@ type kw_type = | INTEGER of int | FLOAT of float | NONE - | END_OF_FILE | ADD_TASK | DEL_TASK | GET_TASK @@ -24,19 +23,19 @@ type kw_type = | SET_RUNNING | SET_WAITING -type state_task = { state : string ; task : string ; } -type state_taskid = { state : string ; task_id : int ; } +type state_tasks = { state : string ; tasks : string list ; } +type state_taskids = { state : string ; task_ids : int list ; } +type state_taskids_clientid = { state : string ; task_ids : int list ; client_id : int ; } type state_clientid = { state : string ; client_id : int ; } -type state_taskid_clientid = { state : string ; task_id : int ; client_id : int ; } type state_tcp_inproc = { state : string ; push_address_tcp : string ; push_address_inproc : string ; } type psi = { client_id: int ; n_state: int ; n_det: int ; psi_det_size: int ; n_det_generators: int option ; n_det_selectors: int option } type msg = - | AddTask_ of state_task - | DelTask_ of state_taskid + | AddTask_ of state_tasks + | DelTask_ of state_taskids | GetTask_ of state_clientid - | TaskDone_ of state_taskid_clientid + | TaskDone_ of state_taskids_clientid | Disconnect_ of state_clientid | Connect_ of string | NewJob_ of state_tcp_inproc @@ -52,7 +51,7 @@ type msg = } let word = [^' ' '\t' '\n']+ -let text = [^' ']+[^'\n']+ +let text = [^ ' ' '|']+[^ '|']+ let integer = ['0'-'9']+ let real = '-'? integer '.' integer (['e' 'E'] '-'? integer)? @@ -61,11 +60,20 @@ let white = [' ' '\t']+ rule get_text = parse | text as t { TEXT t } + | eof { TERMINATE } + | _ { NONE } + +and get_int = parse + | integer as i { INTEGER (int_of_string i) } + | eof { TERMINATE } + | _ { NONE } + +and get_word = parse + | word as w { WORD w } + | eof { TERMINATE } | _ { NONE } and kw = parse - | integer as i { INTEGER (int_of_string i) } - | real as r { FLOAT (float_of_string r)} | "add_task" { ADD_TASK } | "del_task" { DEL_TASK } | "get_task" { GET_TASK } @@ -82,24 +90,23 @@ and kw = parse | "set_stopped" { SET_STOPPED } | "set_running" { SET_RUNNING } | "set_waiting" { SET_WAITING } - | word as w { WORD w } - | eof { END_OF_FILE } | _ { NONE } { - let rec read_text lexbuf = + let rec read_text ?(accu=[]) lexbuf = let token = get_text lexbuf in match token with - | TEXT t -> t - | NONE -> read_text lexbuf + | TEXT t -> read_text ~accu:(t::accu) lexbuf + | TERMINATE -> List.rev accu + | NONE -> read_text ~accu lexbuf | _ -> failwith "Error in MessageLexer (2)" and read_word lexbuf = let token = - kw lexbuf + get_word lexbuf in match token with | WORD w -> w @@ -108,13 +115,23 @@ and kw = parse and read_int lexbuf = let token = - kw lexbuf + get_int lexbuf in match token with | INTEGER i -> i | NONE -> read_int lexbuf | _ -> failwith "Error in MessageLexer (4)" + and read_ints ?(accu=[]) lexbuf = + let token = + get_int lexbuf + in + match token with + | INTEGER i -> read_ints ~accu:(i::accu) lexbuf + | TERMINATE -> List.rev accu + | NONE -> read_ints ~accu lexbuf + | _ -> failwith "Error in MessageLexer (4)" + and parse_rec lexbuf = let token = kw lexbuf @@ -122,13 +139,13 @@ and kw = parse match token with | ADD_TASK -> let state = read_word lexbuf in - let task = read_text lexbuf in - AddTask_ { state ; task } + let tasks = read_text lexbuf in + AddTask_ { state ; tasks } | DEL_TASK -> - let state = read_word lexbuf in - let task_id = read_int lexbuf in - DelTask_ { state ; task_id } + let state = read_word lexbuf in + let task_ids = read_ints lexbuf in + DelTask_ { state ; task_ids } | GET_TASK -> let state = read_word lexbuf in @@ -137,9 +154,9 @@ and kw = parse | TASK_DONE -> let state = read_word lexbuf in - let client_id = read_int lexbuf in - let task_id = read_int lexbuf in - TaskDone_ { state ; task_id ; client_id } + let client_id = read_int lexbuf in + let task_ids = read_ints lexbuf in + TaskDone_ { state ; task_ids ; client_id } | DISCONNECT -> let state = read_word lexbuf in @@ -177,7 +194,7 @@ and kw = parse EndJob_ state | ERROR -> - let message = read_text lexbuf in + let message = List.hd (read_text lexbuf) in Error_ message | OK -> Ok_ @@ -198,9 +215,12 @@ and kw = parse let debug () = let l = [ "add_task state_pouet Task pouet zob" ; + "add_task state_pouet Task pouet zob |Task2 zob | Task3 prout" ; "del_task state_pouet 12345" ; + "del_task state_pouet 12345 | 6789 | 10 | 11" ; "get_task state_pouet 12" ; "task_done state_pouet 12 12345"; + "task_done state_pouet 12 12345 | 678 | 91011"; "connect tcp"; "disconnect state_pouet 12"; "new_job state_pouet tcp://test.com:12345 ipc:///dev/shm/x.socket"; @@ -218,10 +238,10 @@ and kw = parse |> List.map parse in List.map (function - | AddTask_ { state ; task } -> Printf.sprintf "ADD_TASK state:\"%s\" task:\"%s\"" state task - | DelTask_ { state ; task_id } -> Printf.sprintf "DEL_TASK state:\"%s\" task_id:%d" state task_id + | AddTask_ { state ; tasks } -> Printf.sprintf "ADD_TASK state:\"%s\" tasks:{\"%s\"}" state (String.concat "\"}|{\"" tasks) + | DelTask_ { state ; task_ids } -> Printf.sprintf "DEL_TASK state:\"%s\" task_ids:{%s}" state (String.concat "|" @@ List.map string_of_int task_ids) | GetTask_ { state ; client_id } -> Printf.sprintf "GET_TASK state:\"%s\" task_id:%d" state client_id - | TaskDone_ { state ; task_id ; client_id } -> Printf.sprintf "TASK_DONE state:\"%s\" task_id:%d client_id:%d" state task_id client_id + | TaskDone_ { state ; task_ids ; client_id } -> Printf.sprintf "TASK_DONE state:\"%s\" task_ids:{%s} client_id:%d" state (String.concat "|" @@ List.map string_of_int task_ids) client_id | Disconnect_ { state ; client_id } -> Printf.sprintf "DISCONNECT state:\"%s\" client_id:%d" state client_id | Connect_ socket -> Printf.sprintf "CONNECT socket:\"%s\"" socket | NewJob_ { state ; push_address_tcp ; push_address_inproc } -> Printf.sprintf "NEW_JOB state:\"%s\" tcp:\"%s\" inproc:\"%s\"" state push_address_tcp push_address_inproc diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 9d830437..7013b671 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -289,9 +289,9 @@ let disconnect msg program_state rep_socket = let del_task msg program_state rep_socket = - let state, task_id = + let state, task_ids = msg.Message.DelTask_msg.state, - msg.Message.DelTask_msg.task_id + msg.Message.DelTask_msg.task_ids in let failure () = @@ -302,13 +302,14 @@ let del_task msg program_state rep_socket = let new_program_state = { program_state with - queue = Queuing_system.del_task ~task_id program_state.queue + queue = List.fold ~f:(fun queue task_id -> Queuing_system.del_task ~task_id queue) + ~init:program_state.queue task_ids } in let more = (Queuing_system.number_of_tasks new_program_state.queue > 0) in - Message.DelTaskReply (Message.DelTaskReply_msg.create ~task_id ~more) + Message.DelTaskReply (Message.DelTaskReply_msg.create ~task_ids ~more) |> Message.to_string |> ZMQ.Socket.send ~block:true rep_socket ; (** /!\ Has to be blocking *) new_program_state @@ -329,9 +330,9 @@ let del_task msg program_state rep_socket = let add_task msg program_state rep_socket = - let state, task = + let state, tasks = msg.Message.AddTask_msg.state, - msg.Message.AddTask_msg.task + msg.Message.AddTask_msg.tasks in let increment_progress_bar = function @@ -339,59 +340,12 @@ let add_task msg program_state rep_socket = | None -> None in - let rec add_task_triangle program_state imax = function - | 0 -> program_state - | i -> - let task = - Printf.sprintf "%d %d" i imax - in - let new_program_state = - { program_state with - queue = Queuing_system.add_task ~task program_state.queue ; - progress_bar = increment_progress_bar program_state.progress_bar ; - } - in - add_task_triangle new_program_state imax (i-1) - in - - let rec add_task_range program_state i = function - | j when (j < i) -> program_state - | j -> - let task = - Printf.sprintf "%d" j - in - let new_program_state = - { program_state with - queue = Queuing_system.add_task ~task program_state.queue ; - progress_bar = increment_progress_bar program_state.progress_bar ; - } - in - add_task_range new_program_state i (j-1) - in - - let new_program_state = function - | "triangle" :: i_str :: [] -> - let imax = - Int.of_string i_str - in - add_task_triangle program_state imax imax - | "range" :: i_str :: j_str :: [] -> - let i, j = - Int.of_string i_str, - Int.of_string j_str - in - add_task_range program_state i j - | _ -> - { program_state with - queue = Queuing_system.add_task ~task program_state.queue ; - progress_bar = increment_progress_bar program_state.progress_bar ; - } - in - let result = - String.split ~on:' ' task - |> List.filter ~f:(fun x -> x <> "") - |> new_program_state + { program_state with + queue = List.fold ~f:(fun queue task -> Queuing_system.add_task ~task queue) + ~init:program_state.queue tasks ; + progress_bar = increment_progress_bar program_state.progress_bar ; + } in reply_ok rep_socket; result @@ -448,10 +402,10 @@ let get_task msg program_state rep_socket pair_socket = let task_done msg program_state rep_socket = - let state, client_id, task_id = + let state, client_id, task_ids = msg.Message.TaskDone_msg.state, msg.Message.TaskDone_msg.client_id, - msg.Message.TaskDone_msg.task_id + msg.Message.TaskDone_msg.task_ids in let increment_progress_bar = function @@ -466,7 +420,8 @@ let task_done msg program_state rep_socket = and success () = let result = { program_state with - queue = Queuing_system.end_task ~task_id ~client_id program_state.queue ; + queue = List.fold ~f:(fun queue task_id -> Queuing_system.end_task ~task_id + ~client_id queue) ~init:program_state.queue task_ids ; progress_bar = increment_progress_bar program_state.progress_bar ; } in diff --git a/src/Integrals_Bielec/ao_bi_integrals.irp.f b/src/Integrals_Bielec/ao_bi_integrals.irp.f index 68a7a050..196bfce4 100644 --- a/src/Integrals_Bielec/ao_bi_integrals.irp.f +++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f @@ -346,6 +346,7 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] integer :: n_integrals, rc integer :: kk, m, j1, i1, lmax + character*(64) :: fmt integral = ao_bielec_integral(1,1,1,1) @@ -365,14 +366,16 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] call cpu_time(cpu_1) integer(ZMQ_PTR) :: zmq_to_qp_run_socket - character*(32) :: task - call new_parallel_job(zmq_to_qp_run_socket,'ao_integrals') - do l=ao_num,1,-1 - write(task,*) "triangle ", l - call add_task_to_taskserver(zmq_to_qp_run_socket,task) + character(len=:), allocatable :: task + allocate(character(len=ao_num*12) :: task) + write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))' + do l=1,ao_num + write(task,fmt) (i,l, i=1,l) + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) enddo + deallocate(task) call zmq_set_running(zmq_to_qp_run_socket) From ff05b132599a17134b1a75df0bd77541d4f5ff43 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Feb 2017 14:46:32 +0100 Subject: [PATCH 075/106] Update tasks --- ocaml/TaskServer.ml | 22 ++++++++++++++++------ plugins/Full_CI_ZMQ/zmq_selection.irp.f | 8 +++++--- src/Davidson/davidson_parallel.irp.f | 5 ++--- src/ZMQ/utils.irp.f | 14 ++++++++++---- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 7013b671..6bfdc50e 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -341,10 +341,15 @@ let add_task msg program_state rep_socket = in let result = + let new_queue, new_bar = + List.fold ~f:(fun (queue, bar) task -> + Queuing_system.add_task ~task queue, + increment_progress_bar bar) + ~init:(program_state.queue, program_state.progress_bar) tasks + in { program_state with - queue = List.fold ~f:(fun queue task -> Queuing_system.add_task ~task queue) - ~init:program_state.queue tasks ; - progress_bar = increment_progress_bar program_state.progress_bar ; + queue = new_queue; + progress_bar = new_bar } in reply_ok rep_socket; @@ -418,11 +423,16 @@ let task_done msg program_state rep_socket = program_state and success () = + let new_queue, new_bar = + List.fold ~f:(fun (queue, bar) task_id -> + Queuing_system.end_task ~task_id ~client_id queue, + increment_progress_bar bar) + ~init:(program_state.queue, program_state.progress_bar) task_ids + in let result = { program_state with - queue = List.fold ~f:(fun queue task_id -> Queuing_system.end_task ~task_id - ~client_id queue) ~init:program_state.queue task_ids ; - progress_bar = increment_progress_bar program_state.progress_bar ; + queue = new_queue; + progress_bar = new_bar } in reply_ok rep_socket; diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index 838af9ef..a5f38691 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -4,7 +4,6 @@ subroutine ZMQ_selection(N_in, pt2) implicit none - character*(512) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer, intent(in) :: N_in type(selection_buffer) :: b @@ -28,14 +27,17 @@ subroutine ZMQ_selection(N_in, pt2) integer :: i_generator, i_generator_start, i_generator_max, step ! step = int(max(1.,10*elec_num/mo_tot_num) + character(len=:), allocatable :: task + allocate(character(len=32*N_det_generators) :: task) step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) do i= 1, N_det_generators,step i_generator_start = i i_generator_max = min(i+step-1,N_det_generators) - write(task,*) i_generator_start, i_generator_max, 1, N - call add_task_to_taskserver(zmq_to_qp_run_socket,task) + write(task(32*(i-1)+1:32*i),'(I9,X,I9,X,''1'',X,I9,''|'')') i_generator_start, i_generator_max, N end do + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + deallocate(task) !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 04b0cc52..5387ff5b 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -184,8 +184,7 @@ subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep) integer ,intent(in) :: blockb, blockb2, istep character*(512) :: task - - write(task,*) blockb, blockb2, istep + write(task,'(3(I9,X))') blockb, blockb2, istep call add_task_to_taskserver(zmq_to_qp_run_socket, task) end subroutine @@ -267,7 +266,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) if(task_id == 0) exit - read (task,*) blockb, blockb2, istep + read (task,'(3(I9,X))') blockb, blockb2, istep bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1) do i=blockb, shortcut_(0,2), shortcut_(0,1) do j=i, min(i, shortcut_(0,2)) diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 9e28aff5..aff2707a 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -684,10 +684,12 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task) character*(*), intent(in) :: task integer :: rc, sze - character*(512) :: message + character(len=:), allocatable :: message + + sze = len(trim(task))+12+len(trim(zmq_state)) + allocate(character(len=sze) :: message) write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task) - sze = len(trim(message)) rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) if (rc /= sze) then print *, rc, sze @@ -701,6 +703,7 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task) print *, 'Unable to add the next task' stop -1 endif + deallocate(message) end @@ -714,16 +717,19 @@ subroutine add_task_to_taskserver_send(zmq_to_qp_run_socket,task) character*(*), intent(in) :: task integer :: rc, sze - character*(512) :: message + character(len=:), allocatable :: message + + sze = len(trim(task))+12+len(trim(zmq_state)) + allocate(character(len=sze) :: message) write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task) - sze = len(trim(message)) rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) if (rc /= sze) then print *, rc, sze print *, irp_here,': f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)' stop 'error' endif + deallocate(message) end From 9afc82c878b67a44f8875b51ba1e86aa2dd1fbf1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Feb 2017 22:43:59 +0100 Subject: [PATCH 076/106] Less pressure on qp_run when ading tasks --- src/Davidson/davidson_parallel.irp.f | 23 +++++-------------- src/Davidson/u0Hu0.irp.f | 30 ++++++++++++++++++++----- src/Determinants/H_apply_zmq.template.f | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 5387ff5b..4ff3af03 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -28,8 +28,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) ii=0 sh = blockb do sh2=1,shortcut_(0,1) - exa = 0 - do ni=1,N_int + exa = popcnt(xor(version_(1,sh,1), version_(1,sh2,1))) + do ni=2,N_int exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1))) end do if(exa > 2) cycle @@ -44,8 +44,9 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1 if(i == j) cycle - ext = exa - do ni=1,N_int + ext = exa + popcnt(xor(sorted_i(1), sorted_(1,j,1))) + if(ext > 4) cycle + do ni=2,N_int ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) if(ext > 4) exit end do @@ -176,20 +177,6 @@ end subroutine -subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep) - use f77_zmq - implicit none - - integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket - integer ,intent(in) :: blockb, blockb2, istep - character*(512) :: task - - write(task,'(3(I9,X))') blockb, blockb2, istep - call add_task_to_taskserver(zmq_to_qp_run_socket, task) -end subroutine - - - subroutine davidson_slave_inproc(i) implicit none integer, intent(in) :: i diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 42e61b3a..2d1095cd 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -326,10 +326,20 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ PROVIDE nproc + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, & - !$OMP workload,istep,blockb2) + !$OMP workload,istep,blockb2,task,ipos,iposmax,send) ithread = omp_get_thread_num() if (ithread == 0 ) then + character(len=:), allocatable :: task + character(32) :: tmp_task + integer :: ipos, iposmax + logical :: send + iposmax = shortcut_(0,1)+32 + send = .False. + allocate(character(len=iposmax) :: task) + task = '' + ipos = 1 do sh=1,shortcut_(0,1),1 workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2 do i=sh, shortcut_(0,2), shortcut_(0,1) @@ -339,13 +349,23 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ end do istep = 1+ int(workload*target_workload_inv) do blockb2=0, istep-1 - call davidson_add_task(handler, sh, blockb2, istep) + write(tmp_task,'(3(I9,X),''|'',X)') sh, blockb2, istep + task = task//tmp_task + ipos += 32 + if (ipos+32 < iposmax) then + send = .True. + else + call add_task_to_taskserver(handler, trim(task)) + ipos=1 + task = '' + send = .False. + endif enddo - if (sh == shortcut_(0,1)/10 + 1) then - !$OMP BARRIER - endif enddo + if (send) call add_task_to_taskserver(handler, trim(task)) + deallocate(task) call zmq_set_running(handler) + !$OMP BARRIER call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then !$OMP BARRIER diff --git a/src/Determinants/H_apply_zmq.template.f b/src/Determinants/H_apply_zmq.template.f index 59544b79..ddedc5a2 100644 --- a/src/Determinants/H_apply_zmq.template.f +++ b/src/Determinants/H_apply_zmq.template.f @@ -38,7 +38,7 @@ subroutine $subroutine($params_main) do i_generator=1,N_det_generators $skip write(task,*) i_generator - call add_task_to_taskserver(zmq_to_qp_run_socket,task) + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) enddo allocate ( pt2_generators(N_states,N_det_generators), & From feb9752ecb3da8bd771563d8ccf544218a6fa9d4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Mar 2017 01:19:17 +0100 Subject: [PATCH 077/106] Accelerated distributed Davidson --- ocaml/qptypes_generator.ml | 8 +- .../selection_davidson_slave.irp.f | 5 +- src/Davidson/davidson_parallel.irp.f | 82 +++++++++++++------ src/Davidson/davidson_slave.irp.f | 6 +- src/Davidson/diagonalization_hs2.irp.f | 7 +- src/Davidson/u0Hu0.irp.f | 8 +- 6 files changed, 76 insertions(+), 40 deletions(-) diff --git a/ocaml/qptypes_generator.ml b/ocaml/qptypes_generator.ml index ee988ccb..160a07d0 100644 --- a/ocaml/qptypes_generator.ml +++ b/ocaml/qptypes_generator.ml @@ -42,8 +42,8 @@ let input_data = " * Det_number_max : int assert (x > 0) ; - if (x > 100000000) then - warning \"More than 100 million determinants\"; + if (x > 10000000000) then + warning \"More than 10 billion determinants\"; * States_number : int assert (x > 0) ; @@ -140,8 +140,8 @@ let input_ezfio = " * Det_number : int determinants_n_det - 1 : 100000000 - More than 100 million of determinants + 1 : 10000000000 + More than 10 billion of determinants " ;; diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index d56df13e..58f005bc 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -25,6 +25,7 @@ subroutine run_wf double precision :: energy(N_states) character*(64) :: states(2) integer :: rc, i + logical :: force_update call provide_everything @@ -33,6 +34,7 @@ subroutine run_wf states(2) = 'davidson' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + force_update = .True. do @@ -62,7 +64,8 @@ subroutine run_wf ! -------- print *, 'Davidson' - call davidson_miniserver_get() + call davidson_miniserver_get(force_update) + force_update = .False. !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() call davidson_slave_tcp(i) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 4ff3af03..b4b79585 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -145,32 +145,35 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t) end subroutine -subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st) +subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st,update_dets) use f77_zmq implicit none integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket - integer, intent(in) :: n0,n, n_st + integer, intent(in) :: n0,n, n_st, update_dets double precision, intent(in) :: u(n0,n_st) integer :: i,k - dav_size = n - touch dav_size - - do i=1,n - do k=1,N_int - dav_det(k,1,i) = psi_det(k,1,i) - dav_det(k,2,i) = psi_det(k,2,i) + if (update_dets == 1) then + dav_size = n + touch dav_size + do i=1,dav_size + do k=1,N_int + dav_det(k,1,i) = psi_det(k,1,i) + dav_det(k,2,i) = psi_det(k,2,i) + enddo enddo - enddo + touch dav_det + endif + do i=1,n do k=1,n_st dav_ut(k,i) = u(i,k) enddo enddo - touch dav_det dav_ut + soft_touch dav_ut call new_parallel_job(zmq_to_qp_run_socket,"davidson") end subroutine @@ -454,9 +457,10 @@ end subroutine -subroutine davidson_miniserver_run() +subroutine davidson_miniserver_run(update_dets) use f77_zmq implicit none + integer update_dets integer(ZMQ_PTR) responder character*(64) address character(len=:), allocatable :: buffer @@ -465,18 +469,23 @@ subroutine davidson_miniserver_run() allocate (character(len=20) :: buffer) address = 'tcp://*:11223' + PROVIDE dav_det dav_ut dav_size + responder = f77_zmq_socket(zmq_context, ZMQ_REP) rc = f77_zmq_bind(responder,address) do rc = f77_zmq_recv(responder, buffer, 5, 0) - if (buffer(1:rc) /= 'end') then - rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0) - else + if (buffer(1:rc) == 'end') then rc = f77_zmq_send (responder, "end", 3, 0) exit + else if (buffer(1:rc) == 'det') then + rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, 0) + else if (buffer(1:rc) == 'ut') then + rc = f77_zmq_send (responder, update_dets, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0) endif enddo @@ -503,34 +512,41 @@ subroutine davidson_miniserver_end() end subroutine -subroutine davidson_miniserver_get() +subroutine davidson_miniserver_get(force_update) implicit none use f77_zmq - + logical, intent(in) :: force_update integer(ZMQ_PTR) requester character*(64) address character*(20) buffer - integer rc + integer rc, update_dets address = trim(qp_run_address)//':11223' requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) - rc = f77_zmq_send(requester, "Hello", 5, 0) + rc = f77_zmq_send(requester, 'ut', 2, 0) + rc = f77_zmq_recv(requester, update_dets, 4, 0) rc = f77_zmq_recv(requester, dav_size, 4, 0) - TOUCH dav_size - rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) - rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) - TOUCH dav_det dav_ut - + if (update_dets == 1 .or. force_update) then + TOUCH dav_size + endif + rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) + SOFT_TOUCH dav_ut + if (update_dets == 1 .or. force_update) then + rc = f77_zmq_send(requester, 'det', 3, 0) + rc = f77_zmq_recv(requester, dav_size, 4, 0) + rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) + SOFT_TOUCH dav_det + endif + end subroutine BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ] -&BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ] use bitmasks implicit none BEGIN_DOC @@ -538,7 +554,19 @@ end subroutine ! ! Touched in davidson_miniserver_get END_DOC + integer :: i,k + dav_det = 0_bit_kind +END_PROVIDER + +BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ] + use bitmasks + implicit none + BEGIN_DOC +! Temporary arrays for parallel davidson +! +! Touched in davidson_miniserver_get + END_DOC dav_ut = -huge(1.d0) END_PROVIDER diff --git a/src/Davidson/davidson_slave.irp.f b/src/Davidson/davidson_slave.irp.f index e28712e2..4d0864e8 100644 --- a/src/Davidson/davidson_slave.irp.f +++ b/src/Davidson/davidson_slave.irp.f @@ -7,6 +7,7 @@ program davidson_slave integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states_diag) character*(64) :: state + logical :: force_update call provide_everything call switch_qp_run_to_master @@ -16,11 +17,12 @@ program davidson_slave state = 'Waiting' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - + force_update = .True. do call wait_for_state(zmq_state,state) if(trim(state) /= "davidson") exit - call davidson_miniserver_get() + call davidson_miniserver_get(force_update) + force_update = .False. integer :: rc, i diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 4b36e030..b50ede7c 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -110,7 +110,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2, itermax + integer :: shift, shift2, itermax, update_dets double precision :: r1, r2 logical :: state_ok(N_st_diag*davidson_sze_max) include 'constants.include.F' @@ -191,6 +191,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ASSERT (Nint > 0) ASSERT (Nint == N_int) + update_dets = 1 + ! Davidson iterations ! =================== @@ -231,10 +233,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s if (distributed_davidson) then - call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8,update_dets) else call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) endif + update_dets = 0 ! Compute h_kl = = diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 2d1095cd..233919da 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -264,7 +264,7 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] END_PROVIDER -subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) +subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8,update_dets) use omp_lib use bitmasks use f77_zmq @@ -278,7 +278,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ ! ! S2_jj : array of END_DOC - integer, intent(in) :: N_st,n,Nint, sze_8 + integer, intent(in) :: N_st,n,Nint, sze_8, update_dets double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) double precision, intent(in) :: u_0(sze_8,N_st) double precision, intent(in) :: H_jj(n), S2_jj(n) @@ -309,7 +309,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ v_0 = 0.d0 s_0 = 0.d0 - call davidson_init(handler,u_0,size(u_0,1),n,N_st) + call davidson_init(handler,u_0,size(u_0,1),n,N_st,update_dets) ave_workload = 0.d0 do sh=1,shortcut_(0,1) @@ -369,7 +369,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then !$OMP BARRIER - call davidson_miniserver_run () + call davidson_miniserver_run (update_dets) else !$OMP BARRIER call davidson_slave_inproc(ithread) From 8a5a671e314199653433c9f01ea7b6074f6ff978 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Mar 2017 10:41:55 +0100 Subject: [PATCH 078/106] Reduced error in MRCC --- plugins/MRCC_Utils/davidson.irp.f | 67 +++++++++-------------------- plugins/MRCC_Utils/mrcc_utils.irp.f | 42 ++++++++++++------ plugins/mrcepa0/dressing.irp.f | 18 +++----- plugins/mrcepa0/mrcc.irp.f | 2 +- 4 files changed, 56 insertions(+), 73 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 6bdadb24..c884b3c2 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -35,21 +35,20 @@ subroutine davidson_diag_mrcc(dets_in,u_in,energies,dim_in,sze,N_st,N_st_diag,Ni PROVIDE mo_bielec_integrals_in_map allocate(H_jj(sze)) + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(sze,H_jj,N_det_ref,dets_in,Nint,istate,delta_ii,idx_ref) & !$OMP PRIVATE(i) - !$OMP DO SCHEDULE(guided) - do i=1,sze + !$OMP DO + do i=2,sze H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint) enddo !$OMP END DO - !$OMP DO SCHEDULE(guided) - do i=1,N_det_ref - H_jj(idx_ref(i)) += delta_ii(istate,i) - enddo - !$OMP END DO !$OMP END PARALLEL + do i=1,N_det_ref + H_jj(idx_ref(i)) += delta_ii(istate,i) + enddo call davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit,istate) deallocate (H_jj) end @@ -224,17 +223,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s W(i,k,iter+1) = 0.d0 enddo enddo -! do k=1,N_st_diag -! do iter2=1,iter -! do l=1,N_st_diag -! do i=1,sze -! U(i,k,iter+1) = U(i,k,iter+1) + U(i,l,iter2)*y(l,iter2,k,1) -! W(i,k,iter+1) = W(i,k,iter+1) + W(i,l,iter2)*y(l,iter2,k,1) -! enddo -! enddo -! enddo -! enddo -! ! call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, & 1.d0, U, size(U,1), y, size(y,1)*size(y,2), 0.d0, U(1,1,iter+1), size(U,1)) @@ -276,27 +264,11 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s do k=1,N_st_diag -! do iter2=1,iter -! do l=1,N_st_diag -! c(1) = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) -! do i=1,sze -! U(i,k,iter+1) = U(i,k,iter+1) - c(1) * U(i,l,iter2) -! enddo -! enddo -! enddo -! call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1), & U(1,k,iter+1),1,0.d0,c,1) call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1), & c,1,1.d0,U(1,k,iter+1),1) -! -! do l=1,k-1 -! c(1) = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) -! do i=1,sze -! U(i,k,iter+1) = U(i,k,iter+1) - c(1) * U(i,l,iter+1) -! enddo -! enddo -! + call dgemv('T',sze,k-1,1.d0,U(1,1,iter+1),size(U,1), & U(1,k,iter+1),1,0.d0,c,1) call dgemv('N',sze,k-1,-1.d0,U(1,1,iter+1),size(U,1), & @@ -429,7 +401,7 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) allocate(vt(sze_8,N_st)) Vt = 0.d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(static,1) do sh=1,shortcut(0,1) do sh2=sh,shortcut(0,1) exa = 0 @@ -468,9 +440,9 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) enddo enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(static,1) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) @@ -490,7 +462,7 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) end do end do enddo - !$OMP END DO NOWAIT + !$OMP END DO !$OMP DO do ii=1,n_det_ref @@ -559,25 +531,26 @@ subroutine davidson_diag_mrcc_hs2(dets_in,u_in,dim_in,energies,sze,N_st,N_st_dia ASSERT (sze > 0) ASSERT (Nint > 0) ASSERT (Nint == N_int) - PROVIDE mo_bielec_integrals_in_map + PROVIDE mo_bielec_integrals_in_map allocate(H_jj(sze), S2_jj(sze)) + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) + call get_s2(dets_in(1,1,1),dets_in(1,1,1),Nint,S2_jj(1)) !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(sze,H_jj,S2_jj, dets_in,Nint,N_det_ref,delta_ii, & !$OMP idx_ref, istate) & !$OMP PRIVATE(i) - !$OMP DO SCHEDULE(guided) - do i=1,sze + !$OMP DO + do i=2,sze H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint) call get_s2(dets_in(1,1,i),dets_in(1,1,i),Nint,S2_jj(i)) enddo !$OMP END DO - !$OMP DO SCHEDULE(guided) + !$OMP END PARALLEL + do i=1,N_det_ref H_jj(idx_ref(i)) += delta_ii(istate,i) enddo - !$OMP END DO - !$OMP END PARALLEL call davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit,istate) deallocate (H_jj,S2_jj) @@ -1051,7 +1024,7 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i Vt = 0.d0 St = 0.d0 - !$OMP DO SCHEDULE(guided) + !$OMP DO SCHEDULE(static,1) do sh=1,shortcut(0,1) do sh2=sh,shortcut(0,1) exa = 0 @@ -1094,7 +1067,7 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i enddo enddo !$OMP END DO - !$OMP DO SCHEDULE(guided) + !$OMP DO SCHEDULE(static,1) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index e0da2f20..79d139cf 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -771,10 +771,8 @@ END_PROVIDER factor = 1.d0 resold = huge(1.d0) - do k=0,10*hh_nex + do k=0,hh_nex/4 res = 0.d0 - !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll) reduction(+:res) - !$OMP DO do a_coll = 1, n_exc_active a_col = active_pp_idx(a_coll) cx = 0.d0 @@ -785,21 +783,20 @@ END_PROVIDER res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) X(a_col) = X_new(a_col) end do - !$OMP END DO - !$OMP END PARALLEL if (res > resold) then factor = factor * 0.5d0 endif resold = res - if(iand(k, 4095) == 0) then + if(iand(k, 127) == 0) then print *, "res ", k, res end if if(res < 1d-10) exit end do dIj_unique(1:size(X), s) = X(1:size(X)) + print *, "res ", k, res enddo @@ -831,21 +828,23 @@ END_PROVIDER do s=1,N_states norm = 0.d0 - double precision :: f + double precision :: f, g, gmax + gmax = 1.d0*maxval(dabs(psi_non_ref_coef(:,s))) do i=1,N_det_non_ref - if (rho_mrcc(i,s) == 0.d0) then - rho_mrcc(i,s) = 1.d-32 - endif - if (lambda_type == 2) then f = 1.d0 else + if (rho_mrcc(i,s) == 0.d0) then + cycle + endif ! f is such that f.\tilde{c_i} = c_i f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) ! Avoid numerical instabilities - f = min(f,2.d0) - f = max(f,-2.d0) +! g = 1.d0+dabs(gmax / psi_non_ref_coef(i,s) ) + g = 2.d0+100.d0*exp(-20.d0*dabs(psi_non_ref_coef(i,s)/gmax)) + f = min(f, g) + f = max(f,-g) endif norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) @@ -1087,6 +1086,22 @@ end function end do hh_shortcut(hh_shortcut(0)+1) = s+1 + if (hh_shortcut(0) > N_hh_exists) then + print *, 'Error in ', irp_here + print *, 'hh_shortcut(0) :', hh_shortcut(0) + print *, 'N_hh_exists : ', N_hh_exists + print *, 'Is your active space defined?' + stop + endif + + if (hh_shortcut(hh_shortcut(0)+1)-1 > N_pp_exists) then + print *, 'Error 1 in ', irp_here + print *, 'hh_shortcut(hh_shortcut(0)+1)-1 :', hh_shortcut(hh_shortcut(0)+1)-1 + print *, 'N_pp_exists : ', N_pp_exists + print *, 'Is your active space defined?' + stop + endif + do s=2,4,2 do i=1,hh_shortcut(0) if(hh_exists(s, i) == 0) then @@ -1097,6 +1112,7 @@ end function end if end do + do i=1,hh_shortcut(hh_shortcut(0)+1)-1 if(pp_exists(s, i) == 0) then pp_exists(s-1, i) = 0 diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index c772e2aa..cebe0a44 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -23,7 +23,7 @@ use bitmasks !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) & - !$OMP private(h, n, mask, omask, buf, ok, iproc) + !$OMP private(h, n, mask, omask, buf, ok, iproc) do gen= 1, N_det_generators allocate(buf(N_int, 2, N_det_non_ref)) iproc = omp_get_thread_num() + 1 @@ -232,12 +232,6 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen !hIk = hij_mrcc(idx_alpha(k_sd),i_I) ! call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),Nint,hIk) - do i_state=1,N_states - dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state) - !dIk(i_state) = get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,idx_alpha(k_sd)), N_int) !!hIk * lambda_mrcc(i_state,idx_alpha(k_sd)) - !dIk(i_state) = psi_non_ref_coef(idx_alpha(k_sd), i_state) / psi_ref_coef(i_I, i_state) - enddo - ! |l> = Exc(k -> alpha) |I> call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree,phase,Nint) @@ -250,6 +244,10 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen call apply_excitation(psi_ref(1,1,i_I), exc, tmp_det, ok, Nint) if(.not. ok) cycle + do i_state=1,N_states + dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state) + enddo + ! do i_state=1,N_states dka(i_state) = 0.d0 @@ -268,12 +266,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen loop = .false. if (.not.loop) then call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),exc,degree,phase2,Nint) - hIl = hij_mrcc(idx_alpha(l_sd),i_I) -! call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hIl) do i_state=1,N_states dka(i_state) = dij(i_I, idx_alpha(l_sd), i_state) * phase * phase2 - !dka(i_state) = get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,idx_alpha(l_sd)), N_int) * phase * phase2 !hIl * lambda_mrcc(i_state,idx_alpha(l_sd)) * phase * phase2 - !dka(i_state) = psi_non_ref_coef(idx_alpha(l_sd), i_state) / psi_ref_coef(i_I, i_state) * phase * phase2 enddo endif @@ -292,7 +286,6 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen k_sd = idx_alpha(l_sd) hla = hij_cache(k_sd) sla = sij_cache(k_sd) -! call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla) do i_state=1,N_states dIa_hla(i_state,k_sd) = dIa(i_state) * hla dIa_sla(i_state,k_sd) = dIa(i_state) * sla @@ -336,6 +329,7 @@ end integer :: i, j, i_state !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc + PROVIDE psi_ref_lock if(mrmode == 3) then do i = 1, N_det_ref diff --git a/plugins/mrcepa0/mrcc.irp.f b/plugins/mrcepa0/mrcc.irp.f index a5614942..bb184761 100644 --- a/plugins/mrcepa0/mrcc.irp.f +++ b/plugins/mrcepa0/mrcc.irp.f @@ -5,7 +5,7 @@ program mrsc2sub !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc mrmode = 3 - + read_wf = .True. SOFT_TOUCH read_wf call set_generators_bitmasks_as_holes_and_particles From 11932f5540c0e2776e3228a66ca1b655d44d9a23 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Mar 2017 12:02:21 +0100 Subject: [PATCH 079/106] Fixed FCI_ZMQ --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 3 ++- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 10 +++------- plugins/Full_CI_ZMQ/selection_buffer.irp.f | 2 +- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 15 ++++----------- 5 files changed, 11 insertions(+), 21 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index a0d1a5ea..3feec96f 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -69,7 +69,7 @@ program fci_zmq n_det_before = N_det to_select = N_det - to_select = max(64-N_det, to_select) + to_select = max(N_det, to_select) to_select = min(to_select, N_det_max-n_det_before) call ZMQ_selection(to_select, pt2) diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 070d3f97..63033f82 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -9,7 +9,7 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: rc, i integer :: worker_id, task_id(1), ctask, ltask - character*(1000000) :: task + character(len=:), allocatable :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -26,6 +26,7 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: Nindex allocate(pt2_detail(N_states, N_det), index(N_det)) + allocate(character(len=10000) :: task) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 5bf00a1d..59b2ba1f 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -41,8 +41,8 @@ subroutine run_selection_slave(thread,iproc,energy) if (done) then ctask = ctask - 1 else - integer :: i_generator, i_generator_start, i_generator_max, step, N - read (task,*) i_generator_start, i_generator_max, step, N + integer :: i_generator, N + read(task,*) i_generator, N if(buf%N == 0) then ! Only first time call create_selection_buffer(N, N*2, buf) @@ -50,11 +50,7 @@ subroutine run_selection_slave(thread,iproc,energy) else if(N /= buf%N) stop "N changed... wtf man??" end if - !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) - !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) - do i_generator=i_generator_start,i_generator_max,step - call select_connected(i_generator,energy,pt2,buf,0) - enddo + call select_connected(i_generator,energy,pt2,buf,0) endif if(done .or. ctask == size(task_id)) then diff --git a/plugins/Full_CI_ZMQ/selection_buffer.irp.f b/plugins/Full_CI_ZMQ/selection_buffer.irp.f index f06f9726..d296b399 100644 --- a/plugins/Full_CI_ZMQ/selection_buffer.irp.f +++ b/plugins/Full_CI_ZMQ/selection_buffer.irp.f @@ -27,7 +27,7 @@ subroutine add_to_selection_buffer(b, det, val) if(dabs(val) >= b%mini) then b%cur += 1 - b%det(:,:,b%cur) = det(:,:) + b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) b%val(b%cur) = val if(b%cur == size(b%val)) then call sort_selection_buffer(b) diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index a5f38691..e033b9c2 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -24,17 +24,10 @@ subroutine ZMQ_selection(N_in, pt2) call create_selection_buffer(N, N*2, b) endif - integer :: i_generator, i_generator_start, i_generator_max, step -! step = int(max(1.,10*elec_num/mo_tot_num) - character(len=:), allocatable :: task - allocate(character(len=32*N_det_generators) :: task) - step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) - step = max(1,step) - do i= 1, N_det_generators,step - i_generator_start = i - i_generator_max = min(i+step-1,N_det_generators) - write(task(32*(i-1)+1:32*i),'(I9,X,I9,X,''1'',X,I9,''|'')') i_generator_start, i_generator_max, N + allocate(character(len=20*N_det_generators) :: task) + do i= 1, N_det_generators + write(task(20*(i-1)+1:20*i),'(I9,X,I9,''|'')') i, N end do call add_task_to_taskserver(zmq_to_qp_run_socket,task) deallocate(task) @@ -49,7 +42,7 @@ subroutine ZMQ_selection(N_in, pt2) !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, 'selection') if (N_in > 0) then - call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) call copy_H_apply_buffer_to_wf() if (s2_eig) then call make_s2_eigenfunction From 317ca2fbaac1b1f8a63e5efea10a0f30cceacd7c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Mar 2017 12:15:49 +0100 Subject: [PATCH 080/106] Fixed Distributed Davidson of a bitch --- src/Davidson/davidson_parallel.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index b4b79585..eee6deb6 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -256,7 +256,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) if(task_id == 0) exit - read (task,'(3(I9,X))') blockb, blockb2, istep + read (task,*) blockb, blockb2, istep bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1) do i=blockb, shortcut_(0,2), shortcut_(0,1) do j=i, min(i, shortcut_(0,2)) From 8da52b8f59936df11654fa8e0049720d331b37bc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Mar 2017 22:20:57 +0100 Subject: [PATCH 081/106] Fixed communications --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 65 +++++++++---------- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 9 +-- plugins/Full_CI_ZMQ/zmq_selection.irp.f | 3 +- src/Davidson/u0Hu0.irp.f | 68 +++++++++----------- src/ZMQ/utils.irp.f | 27 ++++---- 5 files changed, 77 insertions(+), 95 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 745dffac..70ce056f 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -9,7 +9,7 @@ subroutine ZMQ_pt2(pt2,relative_error) implicit none - character*(512) :: task + character(len=64000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_to_qp_run_socket2 type(selection_buffer) :: b integer, external :: omp_get_thread_num @@ -62,49 +62,42 @@ subroutine ZMQ_pt2(pt2,relative_error) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer :: ipos + ipos=1 + do i=1,tbc(0) + if(tbc(i) > fragment_first) then + write(task(ipos:ipos+20),'(I9,X,I9,''|'')') 0, i + ipos += 20 + if (ipos > 64000) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20))) + ipos=1 + endif + else + do j=1,fragment_count + write(task(ipos:ipos+20),'(I9,X,I9,''|'')') j, i + ipos += 20 + if (ipos > 64000) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20))) + ipos=1 + endif + end do + end if + end do + if (ipos > 1) then + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20))) + endif + call zmq_set_running(zmq_to_qp_run_socket) - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, relative_error) NUM_THREADS(nproc+1) & - !$OMP PRIVATE(i,zmq_to_qp_run_socket2,i_generator_end,task,j) - zmq_to_qp_run_socket2 = new_zmq_to_qp_run_socket() - - !$OMP DO SCHEDULE(static,1) - do i=1,min(2000,tbc(0)) - i_generator_end = min(i+generator_per_task-1, tbc(0)) - if(tbc(i) > fragment_first) then - write(task,*) (i_generator_end-i+1), 0, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket2,task) - else - do j=1,fragment_count - write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket2,task) - end do - end if - end do - !$OMP END DO NOWAIT - + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call zmq_set_running(zmq_to_qp_run_socket) call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2) - else if (i==1) then - do i=2001,tbc(0) - i_generator_end = min(i+generator_per_task-1, tbc(0)) - if(tbc(i) > fragment_first) then - write(task,*) (i_generator_end-i+1), 0, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket2,task) - else - do j=1,fragment_count - write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket2,task) - end do - end if - end do - call pt2_slave_inproc(1) else call pt2_slave_inproc(i) endif - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket2) !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'pt2') tbc(0) = 0 if (pt2(1) /= 0.d0) then diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 63033f82..f6f41ab3 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -9,7 +9,7 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: rc, i integer :: worker_id, task_id(1), ctask, ltask - character(len=:), allocatable :: task + character*(512) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -26,7 +26,6 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: Nindex allocate(pt2_detail(N_states, N_det), index(N_det)) - allocate(character(len=10000) :: task) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -40,6 +39,7 @@ subroutine run_pt2_slave(thread,iproc,energy) ctask = 1 pt2 = 0d0 pt2_detail = 0d0 + Nindex=1 do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) @@ -125,7 +125,8 @@ subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntas if(rc /= 4*ntask) stop "push" ! Activate is zmq_socket_push is a REQ - rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) end subroutine @@ -155,7 +156,7 @@ subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntas if(rc /= 4*ntask) stop "pull" ! Activate is zmq_socket_pull is a REP - rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) end subroutine diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index e033b9c2..8aaddc19 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -25,12 +25,11 @@ subroutine ZMQ_selection(N_in, pt2) endif character(len=:), allocatable :: task - allocate(character(len=20*N_det_generators) :: task) + task = repeat(' ',20*N_det_generators) do i= 1, N_det_generators write(task(20*(i-1)+1:20*i),'(I9,X,I9,''|'')') i, N end do call add_task_to_taskserver(zmq_to_qp_run_socket,task) - deallocate(task) !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) i = omp_get_thread_num() diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 233919da..b1946a42 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -327,51 +327,43 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ PROVIDE nproc - !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, & - !$OMP workload,istep,blockb2,task,ipos,iposmax,send) + character(len=:), allocatable :: task + task = repeat(' ', iposmax) + character(32) :: tmp_task + integer :: ipos, iposmax + iposmax = shortcut_(0,1)+32 + ipos = 1 + do sh=1,shortcut_(0,1),1 + workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2 + do i=sh, shortcut_(0,2), shortcut_(0,1) + do j=i, min(i, shortcut_(0,2)) + workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 + end do + end do + istep = 1+ int(workload*target_workload_inv) + do blockb2=0, istep-1 + write(tmp_task,'(3(I9,X),''|'',X)') sh, blockb2, istep + task = task//tmp_task + ipos += 32 + if (ipos+32 > iposmax) then + call add_task_to_taskserver(handler, trim(task)) + ipos=1 + task = '' + endif + enddo + enddo + if (ipos>1) then + call add_task_to_taskserver(handler, trim(task)) + endif + + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then - character(len=:), allocatable :: task - character(32) :: tmp_task - integer :: ipos, iposmax - logical :: send - iposmax = shortcut_(0,1)+32 - send = .False. - allocate(character(len=iposmax) :: task) - task = '' - ipos = 1 - do sh=1,shortcut_(0,1),1 - workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2 - do i=sh, shortcut_(0,2), shortcut_(0,1) - do j=i, min(i, shortcut_(0,2)) - workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 - end do - end do - istep = 1+ int(workload*target_workload_inv) - do blockb2=0, istep-1 - write(tmp_task,'(3(I9,X),''|'',X)') sh, blockb2, istep - task = task//tmp_task - ipos += 32 - if (ipos+32 < iposmax) then - send = .True. - else - call add_task_to_taskserver(handler, trim(task)) - ipos=1 - task = '' - send = .False. - endif - enddo - enddo - if (send) call add_task_to_taskserver(handler, trim(task)) - deallocate(task) call zmq_set_running(handler) - !$OMP BARRIER call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then - !$OMP BARRIER call davidson_miniserver_run (update_dets) else - !$OMP BARRIER call davidson_slave_inproc(ithread) endif !$OMP END PARALLEL diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index aff2707a..8e3a94e5 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -94,7 +94,7 @@ subroutine switch_qp_run_to_master print *, 'This run should be started with the qp_run command' stop -1 endif - qp_run_address = trim(buffer) + qp_run_address = adjustl(buffer) print *, 'Switched to qp_run master : ', trim(qp_run_address) integer :: i @@ -684,26 +684,24 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task) character*(*), intent(in) :: task integer :: rc, sze - character(len=:), allocatable :: message + character(len=:), allocatable :: message + + message='add_task '//trim(zmq_state)//' '//trim(task) + sze = len(message) + rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0) - sze = len(trim(task))+12+len(trim(zmq_state)) - allocate(character(len=sze) :: message) - write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task) - - rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) if (rc /= sze) then print *, rc, sze print *, irp_here,': f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)' stop 'error' endif - rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) + rc = f77_zmq_recv(zmq_to_qp_run_socket, message, sze-1, 0) if (message(1:rc) /= 'ok') then print *, trim(task) print *, 'Unable to add the next task' stop -1 endif - deallocate(message) end @@ -720,7 +718,7 @@ subroutine add_task_to_taskserver_send(zmq_to_qp_run_socket,task) character(len=:), allocatable :: message sze = len(trim(task))+12+len(trim(zmq_state)) - allocate(character(len=sze) :: message) + message = repeat(' ',sze) write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task) rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) @@ -729,7 +727,6 @@ subroutine add_task_to_taskserver_send(zmq_to_qp_run_socket,task) print *, irp_here,': f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)' stop 'error' endif - deallocate(message) end @@ -797,17 +794,17 @@ subroutine get_task_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id,task) write(message,*) 'get_task '//trim(zmq_state), worker_id sze = len(trim(message)) - rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) + rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0) if (rc /= sze) then print *, irp_here, ':f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)' stop 'error' endif + message = repeat(' ',512) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) - message = trim(message(1:rc)) - read(message,*) reply + read(message(1:rc),*) reply if (trim(reply) == 'get_task_reply') then - read(message,*) reply, task_id + read(message(1:rc),*) reply, task_id rc = 15 do while (message(rc:rc) == ' ') rc += 1 From 9ae8a25ab03ae07407da9b204607821463326754 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Mar 2017 23:14:04 +0100 Subject: [PATCH 082/106] Fixed stoch --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 44 +++++++++++++------- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 22 +++++----- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 70ce056f..41d62eca 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -63,40 +63,52 @@ subroutine ZMQ_pt2(pt2,relative_error) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer :: ipos + logical :: tasks + tasks = .False. ipos=1 + do i=1,tbc(0) if(tbc(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,X,I9,''|'')') 0, i + write(task(ipos:ipos+20),'(I9,X,I9,''|'')') 0, tbc(i) ipos += 20 if (ipos > 64000) then call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20))) ipos=1 + tasks = .True. endif else do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,X,I9,''|'')') j, i + write(task(ipos:ipos+20),'(I9,X,I9,''|'')') j, tbc(i) ipos += 20 if (ipos > 64000) then call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20))) ipos=1 + tasks = .True. endif end do end if end do if (ipos > 1) then call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20))) + tasks = .True. endif - call zmq_set_running(zmq_to_qp_run_socket) - !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & - !$OMP PRIVATE(i) - i = omp_get_thread_num() - if (i==0) then - call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2) - else - call pt2_slave_inproc(i) - endif - !$OMP END PARALLEL + if (tasks) then + call zmq_set_running(zmq_to_qp_run_socket) + + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then + call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2) + else + call pt2_slave_inproc(i) + endif + !$OMP END PARALLEL + + else + pt2(1) = sum(pt2_detail(1,:)) + endif call end_parallel_job(zmq_to_qp_run_socket, 'pt2') tbc(0) = 0 @@ -105,6 +117,7 @@ subroutine ZMQ_pt2(pt2,relative_error) endif end do + end subroutine @@ -160,7 +173,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su type(selection_buffer), intent(inout) :: b - double precision :: pt2_mwen(N_states, N_det_generators) + double precision, allocatable :: pt2_mwen(:,:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -181,7 +194,8 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su integer, allocatable :: parts_to_get(:) logical, allocatable :: actually_computed(:) - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) + allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & + pt2_mwen(N_states, N_det_generators) ) actually_computed(:) = computed(:) parts_to_get(:) = 1 @@ -198,7 +212,7 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(N_det_generators)) + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(1)) more = 1 if (time0 < 0.d0) then time0 = omp_get_wtime() diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index f6f41ab3..4dd4374c 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -22,10 +22,10 @@ subroutine run_pt2_slave(thread,iproc,energy) double precision :: pt2(N_states) double precision,allocatable :: pt2_detail(:,:) - integer,allocatable :: index(:) + integer :: index integer :: Nindex - allocate(pt2_detail(N_states, N_det), index(N_det)) + allocate(pt2_detail(N_states, N_det)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -37,9 +37,9 @@ subroutine run_pt2_slave(thread,iproc,energy) end if buf%N = 0 ctask = 1 + Nindex=1 pt2 = 0d0 pt2_detail = 0d0 - Nindex=1 do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) @@ -48,8 +48,7 @@ subroutine run_pt2_slave(thread,iproc,energy) ctask = ctask - 1 else integer :: i_generator, i_i_generator, N, subset - read (task,*) Nindex - read (task,*) Nindex, subset, index(:Nindex) + read (task,*) subset, index !!!!! N=1 @@ -62,7 +61,7 @@ subroutine run_pt2_slave(thread,iproc,energy) if(N /= buf%N) stop "N changed... wtf man??" end if do i_i_generator=1, Nindex - i_generator = index(i_i_generator) + i_generator = index call select_connected(i_generator,energy,pt2_detail(1, i_i_generator),buf,subset) pt2(:) += pt2_detail(:, i_generator) enddo @@ -75,7 +74,6 @@ subroutine run_pt2_slave(thread,iproc,energy) end do if(ctask > 0) then call push_pt2_results(zmq_socket_push, Nindex, index, pt2_detail, task_id(1), ctask) - !print *, "pushed ", index(:Nindex) do i=1,buf%cur call add_to_selection_buffer(buf2, buf%det(1,1,i), buf%val(i)) enddo @@ -104,14 +102,14 @@ subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntas integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: pt2_detail(N_states, N_det) - integer, intent(in) :: ntask, N, index(N), task_id(*) + integer, intent(in) :: ntask, N, index, task_id(*) integer :: rc rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, index, 4*N, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, index, 4, ZMQ_SNDMORE) if(rc /= 4*N) stop "push" @@ -121,7 +119,7 @@ subroutine push_pt2_results(zmq_socket_push, N, index, pt2_detail, task_id, ntas rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0) + rc = f77_zmq_send( zmq_socket_push, task_id, ntask*4, 0) if(rc /= 4*ntask) stop "push" ! Activate is zmq_socket_push is a REQ @@ -136,14 +134,14 @@ subroutine pull_pt2_results(zmq_socket_pull, N, index, pt2_detail, task_id, ntas implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_pull double precision, intent(inout) :: pt2_detail(N_states, N_det) - integer, intent(out) :: index(N_det) + integer, intent(out) :: index integer, intent(out) :: N, ntask, task_id(*) integer :: rc, rn, i rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, index, 4*N, 0) + rc = f77_zmq_recv( zmq_socket_pull, index, 4, 0) if(rc /= 4*N) stop "pull" rc = f77_zmq_recv( zmq_socket_pull, pt2_detail, N_states*8*N, 0) From 8459b325ee3a6b02d05fd202e32aaa4172004dc6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 13:18:53 +0100 Subject: [PATCH 083/106] Convergence mrcc --- plugins/MRCC_Utils/mrcc_utils.irp.f | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 79d139cf..8fd51afe 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -790,13 +790,16 @@ END_PROVIDER resold = res if(iand(k, 127) == 0) then - print *, "res ", k, res - end if + print *, k, res, 1.d0 - res/resold + endif - if(res < 1d-10) exit + if ( (res < 1d-10).or.(res/resold > 0.99d0) ) then + exit + endif + end do dIj_unique(1:size(X), s) = X(1:size(X)) - print *, "res ", k, res + print *, k, res, 1.d0 - res/resold enddo From 2edaf7acd2382b1b22c777cb5921d4b3d69d4150 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 13:20:02 +0100 Subject: [PATCH 084/106] Corrected print norm in mrcc --- plugins/MRCC_Utils/mrcc_utils.irp.f | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 79d139cf..466927a9 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -856,24 +856,20 @@ END_PROVIDER f = 1.d0/norm ! f now contains 1/ - norm = 1.d0 - do i=1,N_det_ref - norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) + norm = 0.d0 + do i=1,N_det_non_ref + norm = norm + psi_non_ref_coef(i,s)*psi_non_ref_coef(i,s) enddo ! norm now contains f = dsqrt(f*norm) ! f normalises T.Psi_0 such that (1+T)|Psi> is normalized - norm = norm*f print *, 'norm of |T Psi_0> = ', dsqrt(norm) + norm = norm*f if (dsqrt(norm) > 1.d0) then stop 'Error : Norm of the SD larger than the norm of the reference.' endif - do i=1,N_det_ref - norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) - enddo - do i=1,N_det_non_ref rho_mrcc(i,s) = rho_mrcc(i,s) * f enddo From eccf1ca93bb8a678fb056003177c37ce19a6bd8a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 13:25:06 +0100 Subject: [PATCH 085/106] Fixed MRCC --- plugins/MRCC_Utils/mrcc_utils.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index b6a16fe0..f7bf1d2b 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -787,7 +787,6 @@ END_PROVIDER if (res > resold) then factor = factor * 0.5d0 endif - resold = res if(iand(k, 127) == 0) then print *, k, res, 1.d0 - res/resold @@ -796,6 +795,7 @@ END_PROVIDER if ( (res < 1d-10).or.(res/resold > 0.99d0) ) then exit endif + resold = res end do dIj_unique(1:size(X), s) = X(1:size(X)) From a0736ce4e1af0a9562f8773dc85c7c9f32f02b76 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 17:32:42 +0100 Subject: [PATCH 086/106] Fixed OCaml Address in use --- ocaml/TaskServer.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 6bfdc50e..23e887e6 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -62,7 +62,11 @@ let bind_socket ~socket_type ~socket ~port = | Unix.Unix_error _ -> (Time.pause @@ Time.Span.of_float 1. ; loop (i-1) ) | other_exception -> raise other_exception in loop 60; - ZMQ.Socket.bind socket @@ Printf.sprintf "ipc:///tmp/qp_run:%d" port + let filename = + Printf.sprintf "/tmp/qp_run:%d" port + in + Sys.remove filename; + ZMQ.Socket.bind socket ("ipc://"^filename) let hostname = lazy ( From b4395468a102f8a5a6b31fa7168e01e35a8040ff Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 18:29:39 +0100 Subject: [PATCH 087/106] Optims davidson --- src/Davidson/u0Hu0.irp.f | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index b1946a42..03bf0f00 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -90,7 +90,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) @@ -123,7 +123,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do sh=1,shortcut(0,1) do sh2=1,shortcut(0,1) if (sh==sh2) cycle @@ -235,13 +235,12 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP CRITICAL (u0Hu0) do istate=1,N_st do i=1,n + !$OMP ATOMIC v_0(i,istate) = v_0(i,istate) + vt(istate,i) enddo enddo - !$OMP END CRITICAL (u0Hu0) deallocate(vt,st) !$OMP END PARALLEL @@ -444,7 +443,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) @@ -477,7 +476,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do sh=1,shortcut(0,1) do sh2=1,shortcut(0,1) if (sh==sh2) cycle @@ -588,14 +587,14 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO - !$OMP CRITICAL (u0Hu0) do istate=1,N_st do i=1,n + !$OMP ATOMIC v_0(i,istate) = v_0(i,istate) + vt(istate,i) + !$OMP ATOMIC s_0(i,istate) = s_0(i,istate) + st(istate,i) enddo enddo - !$OMP END CRITICAL (u0Hu0) deallocate(vt,st) !$OMP END PARALLEL From d01ed36a2788d2d5e9f82884e7d91425dd27b4ed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 18:46:04 +0100 Subject: [PATCH 088/106] Fixed OCaml Address in use --- ocaml/TaskServer.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 23e887e6..a1625719 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -65,7 +65,11 @@ let bind_socket ~socket_type ~socket ~port = let filename = Printf.sprintf "/tmp/qp_run:%d" port in - Sys.remove filename; + begin + match Sys.file_exists filename with + | `Yes -> Sys.remove filename + | _ -> () + end; ZMQ.Socket.bind socket ("ipc://"^filename) From 0aadde30a0628eacab8fbfbf935558e1394c00d8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Mar 2017 18:55:53 +0100 Subject: [PATCH 089/106] OMP atomic --- plugins/MRCC_Utils/davidson.irp.f | 7 +++---- plugins/mrcepa0/dressing.irp.f | 10 +++++++--- src/Determinants/s2.irp.f | 3 +-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index c884b3c2..b96e9585 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -477,13 +477,12 @@ subroutine H_u_0_mrcc_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,istate_in,N_st,sze_8) enddo !$OMP END DO - !$OMP CRITICAL do istate=1,N_st do i=n,1,-1 + !$OMP ATOMIC v_0(i,istate) = v_0(i,istate) + vt(i,istate) enddo enddo - !$OMP END CRITICAL deallocate(vt) !$OMP END PARALLEL @@ -1115,14 +1114,14 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i ! End Specific to dressing ! ------------------------ - !$OMP CRITICAL do istate=1,N_st do i=n,1,-1 + !$OMP ATOMIC v_0(i,istate) = v_0(i,istate) + vt(istate,i) + !$OMP ATOMIC s_0(i,istate) = s_0(i,istate) + st(istate,i) enddo enddo - !$OMP END CRITICAL deallocate(vt,st) !$OMP END PARALLEL diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index cebe0a44..2cd85a6c 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -13,6 +13,7 @@ use bitmasks integer(bit_kind),allocatable :: buf(:,:,:) logical :: ok logical, external :: detEq + integer, external :: omp_get_thread_num delta_ij_mrcc = 0d0 delta_ii_mrcc = 0d0 @@ -291,26 +292,30 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen dIa_sla(i_state,k_sd) = dIa(i_state) * sla enddo enddo - call omp_set_lock( psi_ref_lock(i_I) ) do i_state=1,N_states if(dabs(psi_ref_coef(i_I,i_state)).ge.1.d-3)then do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) + !$OMP ATOMIC delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) + !$OMP ATOMIC delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) + !$OMP ATOMIC delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd) + !$OMP ATOMIC delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd) enddo else delta_ii_(i_state,i_I) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) + !$OMP ATOMIC delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) + !$OMP ATOMIC delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd) enddo endif enddo - call omp_unset_lock( psi_ref_lock(i_I) ) enddo enddo deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache) @@ -329,7 +334,6 @@ end integer :: i, j, i_state !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc - PROVIDE psi_ref_lock if(mrmode == 3) then do i = 1, N_det_ref diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 7e62befb..a807513c 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -223,13 +223,12 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8) enddo !$OMP END DO NOWAIT - !$OMP CRITICAL do istate=1,N_st do i=n,1,-1 + !$OMP ATOMIC v_0(i,istate) = v_0(i,istate) + vt(i,istate) enddo enddo - !$OMP END CRITICAL deallocate(vt) !$OMP END PARALLEL From 7c8201a950400dbd31dd2c97b2917b8ff01d4676 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 11 Mar 2017 11:15:01 +0100 Subject: [PATCH 090/106] Added DDCI --- plugins/CAS_SD_ZMQ/EZFIO.cfg | 9 ++++- plugins/CAS_SD_ZMQ/ezfio_interface.irp.f | 4 -- plugins/CAS_SD_ZMQ/run_selection_slave.irp.f | 2 - plugins/CAS_SD_ZMQ/selection.irp.f | 42 +++++++++++--------- src/Determinants/EZFIO.cfg | 2 +- 5 files changed, 32 insertions(+), 27 deletions(-) delete mode 100644 plugins/CAS_SD_ZMQ/ezfio_interface.irp.f diff --git a/plugins/CAS_SD_ZMQ/EZFIO.cfg b/plugins/CAS_SD_ZMQ/EZFIO.cfg index 7425c8ba..43905f9e 100644 --- a/plugins/CAS_SD_ZMQ/EZFIO.cfg +++ b/plugins/CAS_SD_ZMQ/EZFIO.cfg @@ -1,10 +1,15 @@ [energy] type: double precision -doc: "Calculated CAS-SD energy" +doc: Calculated CAS-SD energy interface: ezfio [energy_pt2] type: double precision -doc: "Calculated selected CAS-SD energy with PT2 correction" +doc: Calculated selected CAS-SD energy with PT2 correction interface: ezfio +[do_ddci] +type: logical +doc: If true, remove purely inactive double excitations +interface: ezfio,provider,ocaml +default: False diff --git a/plugins/CAS_SD_ZMQ/ezfio_interface.irp.f b/plugins/CAS_SD_ZMQ/ezfio_interface.irp.f deleted file mode 100644 index 8adab518..00000000 --- a/plugins/CAS_SD_ZMQ/ezfio_interface.irp.f +++ /dev/null @@ -1,4 +0,0 @@ -! DO NOT MODIFY BY HAND -! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /home/scemama/quantum_package/src/CAS_SD_ZMQ/EZFIO.cfg - diff --git a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f index 35b482f0..e200322f 100644 --- a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f +++ b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f @@ -50,8 +50,6 @@ subroutine run_selection_slave(thread,iproc,energy) else if(N /= buf%N) stop "N changed... wtf man??" end if - !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) - !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) do i_generator=i_generator_start,i_generator_max,step call select_connected(i_generator,energy,pt2,buf) enddo diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f index 70230e9e..76ba04d6 100644 --- a/plugins/CAS_SD_ZMQ/selection.irp.f +++ b/plugins/CAS_SD_ZMQ/selection.irp.f @@ -635,20 +635,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d use selection_types implicit none - integer, intent(in) :: i_generator, sp, h1, h2 - double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num) - logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num) - double precision, intent(in) :: fock_diag_tmp(mo_tot_num) - double precision, intent(in) :: E0(N_states) - double precision, intent(inout) :: pt2(N_states) + integer, intent(in) :: i_generator, sp, h1, h2 + double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num) + logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num) + double precision, intent(in) :: fock_diag_tmp(mo_tot_num) + double precision, intent(in) :: E0(N_states) + double precision, intent(inout) :: pt2(N_states) type(selection_buffer), intent(inout) :: buf - logical :: ok - integer :: s1, s2, p1, p2, ib, j, istate - integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii, max_e_pert,tmp - double precision, external :: diag_H_mat_elem_fock + logical :: ok + integer :: s1, s2, p1, p2, ib, j, istate + integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) + double precision :: e_pert, delta_E, val, Hii, max_e_pert,tmp + double precision, external :: diag_H_mat_elem_fock - logical, external :: detEq + logical, external :: detEq if(sp == 3) then @@ -670,18 +670,25 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(banned(p1,p2)) cycle if(mat(1, p1, p2) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) -logical, external :: is_in_wavefunction -if (is_in_wavefunction(det,N_int)) then - cycle -endif + logical, external :: is_in_wavefunction + if (is_in_wavefunction(det,N_int)) then + stop 'is_in_wf' + cycle + endif + if (do_ddci) then + integer, external :: is_a_two_holes_two_particles + if (is_a_two_holes_two_particles(det)) then + cycle + endif + endif Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) max_e_pert = 0d0 do istate=1,N_states delta_E = E0(istate) - Hii - val = mat(istate, p1, p2) + mat(istate, p1, p2) + val = mat(istate, p1, p2) + mat(istate, p1, p2) tmp = dsqrt(delta_E * delta_E + val * val) if (delta_E < 0.d0) then tmp = -tmp @@ -1232,7 +1239,6 @@ subroutine ZMQ_selection(N_in, pt2) endif integer :: i_generator, i_generator_start, i_generator_max, step -! step = int(max(1.,10*elec_num/mo_tot_num) step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) diff --git a/src/Determinants/EZFIO.cfg b/src/Determinants/EZFIO.cfg index 0676649e..a68a61a5 100644 --- a/src/Determinants/EZFIO.cfg +++ b/src/Determinants/EZFIO.cfg @@ -38,7 +38,7 @@ default: False type: logical doc: Force the wave function to be an eigenfunction of S^2 interface: ezfio,provider,ocaml -default: False +default: True [threshold_generators] type: Threshold From fce537fea9a4acc2b4b7bd1eff692e48a745b165 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 13 Mar 2017 00:26:21 +0100 Subject: [PATCH 091/106] Introduced PT2stoch --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 5 ++-- plugins/Full_CI_ZMQ/pt2_stoch.irp.f | 1 + plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 6 ++--- .../selection_davidson_slave.irp.f | 24 ++++++++++++------- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 3feec96f..fcc38954 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -104,8 +104,9 @@ program fci_zmq E_CI_before(1:N_states) = CI_energy(1:N_states) double precision :: relative_error relative_error=1.d-3 - !call ZMQ_pt2(pt2,relative_error) - call ZMQ_selection(0, pt2)! pour non-stochastic + pt2 = 0.d0 + call ZMQ_pt2(pt2,relative_error) + !call ZMQ_selection(0, pt2)! pour non-stochastic print *, 'Final step' print *, 'N_det = ', N_det print *, 'N_states = ', N_states diff --git a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f index 71ebf357..8ffd8e60 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch.irp.f @@ -25,6 +25,7 @@ subroutine run SOFT_TOUCH pt2_e0_denominator read_wf endif allocate (pt2(N_states)) + pt2 = 0.d0 threshold_selectors = 1.d0 threshold_generators = 1d0 diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 41d62eca..8846a321 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -46,7 +46,7 @@ subroutine ZMQ_pt2(pt2,relative_error) pt2_detail = 0d0 time0 = omp_get_wtime() - print *, "grep - time - avg - err - n_combs" + print *, "time - avg - err - n_combs" generator_per_task = 1 do while(.true.) @@ -270,13 +270,14 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) time = omp_get_wtime() - print "(A, 4(E20.13), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) + print "(3(E22.13), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) if (dabs(eqt/avg) < relative_error) then pt2(1) = avg exit pullLoop endif end if end do pullLoop + print "(3(E22.13), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) @@ -422,7 +423,6 @@ subroutine get_filling_teeth(computed, tbc) tbc(k) = j k=k+1 computed(j) = .true. -! print *, "filled ", j, "to reach tooth", last_full, "ending at", first_det_of_teeth(last_full+1) end if end do tbc(0) = k-1 diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 58f005bc..61b8734e 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -32,6 +32,7 @@ subroutine run_wf zmq_context = f77_zmq_ctx_new () states(1) = 'selection' states(2) = 'davidson' + states(3) = 'pt2' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() force_update = .True. @@ -54,7 +55,7 @@ subroutine run_wf !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() - call selection_slave_tcp(i, energy) + call run_selection_slave(0,i,energy) !$OMP END PARALLEL print *, 'Selection done' @@ -72,17 +73,24 @@ subroutine run_wf !$OMP END PARALLEL print *, 'Davidson done' + else if (trim(zmq_state) == 'pt2') then + + ! Selection + ! --------- + + print *, 'PT2' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call run_pt2_slave(0,i,energy) + !$OMP END PARALLEL + print *, 'PT2 done' + endif end do end -subroutine selection_slave_tcp(i,energy) - implicit none - double precision, intent(in) :: energy(N_states) - integer, intent(in) :: i - - call run_selection_slave(0,i,energy) -end From abf9073a693e27f34ee6e71c4da4a900ea165e78 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 13 Mar 2017 00:30:14 +0100 Subject: [PATCH 092/106] Fixed pt2 stoch print --- plugins/Full_CI_ZMQ/pt2_slave.irp.f | 5 +++-- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 10 +++++----- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f | 8 +++++--- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_slave.irp.f b/plugins/Full_CI_ZMQ/pt2_slave.irp.f index e45bbc51..c112e040 100644 --- a/plugins/Full_CI_ZMQ/pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_slave.irp.f @@ -63,7 +63,8 @@ subroutine pt2_slave_tcp(i,energy) implicit none double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: i - - call run_pt2_slave(0,i,energy) + logical :: lstop + lstop = .False. + call run_pt2_slave(0,i,energy,lstop) end diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 8846a321..fd9c1a72 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -27,7 +27,7 @@ subroutine ZMQ_pt2(pt2,relative_error) double precision, external :: omp_get_wtime double precision :: time0, time - allocate(pt2_detail(N_states, N_det_generators), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) + allocate(pt2_detail(N_states, N_det_generators), comb(N_det_generators/2), computed(N_det_generators), tbc(0:size_tbc)) sumabove = 0d0 sum2above = 0d0 Nabove = 0d0 @@ -105,12 +105,12 @@ subroutine ZMQ_pt2(pt2,relative_error) call pt2_slave_inproc(i) endif !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'pt2') else pt2(1) = sum(pt2_detail(1,:)) endif - call end_parallel_job(zmq_to_qp_run_socket, 'pt2') tbc(0) = 0 if (pt2(1) /= 0.d0) then exit @@ -270,14 +270,14 @@ subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, su avg = E0 + (sumabove(tooth) / Nabove(tooth)) eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) time = omp_get_wtime() - print "(3(E22.13), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) if (dabs(eqt/avg) < relative_error) then pt2(1) = avg - exit pullLoop +! exit pullLoop endif + print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) end if end do pullLoop - print "(3(E22.13), 4(I9))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) + print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 4dd4374c..452b446b 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -43,7 +43,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) - done = task_id(ctask) == 0 + done = task_id(ctask) == 0 if (done) then ctask = ctask - 1 else diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 61b8734e..3d10612f 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -75,15 +75,17 @@ subroutine run_wf else if (trim(zmq_state) == 'pt2') then - ! Selection - ! --------- + ! PT2 + ! --- print *, 'PT2' call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + logical :: lstop + lstop = .False. !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() - call run_pt2_slave(0,i,energy) + call run_pt2_slave(0,i,energy,lstop) !$OMP END PARALLEL print *, 'PT2 done' From 7cb17e0a486efbc9aa20a5bb1a5a6a5ca5d7c865 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 13 Mar 2017 12:26:16 +0100 Subject: [PATCH 093/106] Super fast density matrix --- src/Determinants/density_matrix.irp.f | 87 +++++++++++++++++++------ src/Determinants/spindeterminants.irp.f | 48 +++++++++++++- 2 files changed, 113 insertions(+), 22 deletions(-) diff --git a/src/Determinants/density_matrix.irp.f b/src/Determinants/density_matrix.irp.f index ed2f49bd..e0ec6a6b 100644 --- a/src/Determinants/density_matrix.irp.f +++ b/src/Determinants/density_matrix.irp.f @@ -27,25 +27,33 @@ END_PROVIDER double precision :: ck, cl, ckl double precision :: phase integer :: h1,h2,p1,p2,s1,s2, degree + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int,2) integer :: exc(0:2,2,2),n_occ(2) double precision, allocatable :: tmp_a(:,:,:), tmp_b(:,:,:) + integer :: krow, kcol, lrow, lcol one_body_dm_mo_alpha = 0.d0 one_body_dm_mo_beta = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,m,occ,ck, cl, ckl,phase,h1,h2,p1,p2,s1,s2, degree,exc, & - !$OMP tmp_a, tmp_b, n_occ)& + !$OMP tmp_a, tmp_b, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2)& !$OMP SHARED(psi_det,psi_coef,N_int,N_states,elec_alpha_num,& !$OMP elec_beta_num,one_body_dm_mo_alpha,one_body_dm_mo_beta,N_det,mo_tot_num_align,& - !$OMP mo_tot_num) + !$OMP mo_tot_num,psi_bilinear_matrix_rows,psi_bilinear_matrix_columns, & + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values) allocate(tmp_a(mo_tot_num_align,mo_tot_num,N_states), tmp_b(mo_tot_num_align,mo_tot_num,N_states) ) tmp_a = 0.d0 tmp_b = 0.d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do k=1,N_det - call bitstring_to_list_ab(psi_det(1,1,k), occ, n_occ, N_int) + krow = psi_bilinear_matrix_rows(k) + kcol = psi_bilinear_matrix_columns(k) + tmp_det(:,1) = psi_det(:,1, krow) + tmp_det(:,2) = psi_det(:,2, kcol) + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) do m=1,N_states - ck = psi_coef(k,m)*psi_coef(k,m) + ck = psi_bilinear_matrix_values(k,m)*psi_bilinear_matrix_values(k,m) do l=1,elec_alpha_num j = occ(l,1) tmp_a(j,j,m) += ck @@ -55,24 +63,61 @@ END_PROVIDER tmp_b(j,j,m) += ck enddo enddo - do l=1,k-1 - call get_excitation_degree(psi_det(1,1,k),psi_det(1,1,l),degree,N_int) - if (degree /= 1) then - cycle + + l = k+1 + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) + do while ( lcol == kcol ) + tmp_det2(:,1) = psi_det(:,1, lrow) + tmp_det2(:,2) = psi_det(:,2, lcol) + call get_excitation_degree(tmp_det,tmp_det2,degree,N_int) + if (degree == 1) then + call get_mono_excitation(psi_det(1,1,k),psi_det(1,1,l),exc,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + do m=1,N_states + ckl = psi_bilinear_matrix_values(k,m)*psi_bilinear_matrix_values(l,m) * phase + if (s1==1) then + tmp_a(h1,p1,m) += ckl + tmp_a(p1,h1,m) += ckl + else + tmp_b(h1,p1,m) += ckl + tmp_b(p1,h1,m) += ckl + endif + enddo endif - call get_mono_excitation(psi_det(1,1,k),psi_det(1,1,l),exc,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - do m=1,N_states - ckl = psi_coef(k,m) * psi_coef(l,m) * phase - if (s1==1) then - tmp_a(h1,p1,m) += ckl - tmp_a(p1,h1,m) += ckl - else - tmp_b(h1,p1,m) += ckl - tmp_b(p1,h1,m) += ckl - endif - enddo + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_rows(l) + lcol = psi_bilinear_matrix_columns(l) enddo + + l = k+1 + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + do while ( lrow == krow ) + tmp_det2(:,1) = psi_det(:,1, lrow) + tmp_det2(:,2) = psi_det(:,2, lcol) + call get_excitation_degree(tmp_det,tmp_det2,degree,N_int) + if (degree == 1) then + call get_mono_excitation(psi_det(1,1,k),psi_det(1,1,l),exc,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + do m=1,N_states + ckl = psi_bilinear_matrix_values(k,m)*psi_bilinear_matrix_transp_values(l,m) * phase + if (s1==1) then + tmp_a(h1,p1,m) += ckl + tmp_a(p1,h1,m) += ckl + else + tmp_b(h1,p1,m) += ckl + tmp_b(p1,h1,m) += ckl + endif + enddo + endif + l = l+1 + if (l>N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + enddo !$OMP END DO NOWAIT !$OMP CRITICAL diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 2eec0dea..b30f84cb 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -393,6 +393,8 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) BEGIN_DOC ! Sparse coefficient matrix if the wave function is expressed in a bilinear form : ! D_a^t C D_b +! +! Rows are alpha determinants and columns are beta. END_DOC integer :: i,j,k, l integer(bit_kind) :: tmp_det(N_int,2) @@ -421,10 +423,54 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) call isort(to_sort, iorder, N_det) call iset_order(psi_bilinear_matrix_rows,iorder,N_det) call iset_order(psi_bilinear_matrix_columns,iorder,N_det) - call dset_order(psi_bilinear_matrix_values,iorder,N_det) + do l=1,N_states + call dset_order(psi_bilinear_matrix_values(1,l),iorder,N_det) + enddo deallocate(iorder,to_sort) END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ] +&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows, (N_det) ] +&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_columns, (N_det) ] + use bitmasks + implicit none + BEGIN_DOC +! Sparse coefficient matrix if the wave function is expressed in a bilinear form : +! D_a^t C D_b +! +! Rows are Beta determinants and columns are alpha + END_DOC + integer :: i,j,k,l + + + PROVIDE psi_coef_sorted_bit + + integer, allocatable :: iorder(:), to_sort(:) + allocate(iorder(N_det), to_sort(N_det)) + do l=1,N_states + do k=1,N_det + psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l) + enddo + enddo + do k=1,N_det + psi_bilinear_matrix_transp_columns(k) = psi_bilinear_matrix_columns(k) + psi_bilinear_matrix_transp_rows (k) = psi_bilinear_matrix_rows (k) + i = psi_bilinear_matrix_transp_columns(k) + j = psi_bilinear_matrix_transp_rows (k) + to_sort(k) = N_det_beta_unique * (j-1) + i + iorder(k) = k + enddo + call isort(to_sort, iorder, N_det) + call iset_order(psi_bilinear_matrix_transp_rows,iorder,N_det) + call iset_order(psi_bilinear_matrix_transp_columns,iorder,N_det) + do l=1,N_states + call dset_order(psi_bilinear_matrix_values(1,l),iorder,N_det) + enddo + deallocate(iorder,to_sort) +END_PROVIDER + + BEGIN_PROVIDER [ double precision, psi_bilinear_matrix, (N_det_alpha_unique,N_det_beta_unique,N_states) ] implicit none BEGIN_DOC From b4d6779d8c3a14e2aaf25a0d571e37fcc9010183 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 13 Mar 2017 12:38:22 +0100 Subject: [PATCH 094/106] Super fast density matrix --- src/Determinants/spindeterminants.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index b30f84cb..acc49d50 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -465,7 +465,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_ call iset_order(psi_bilinear_matrix_transp_rows,iorder,N_det) call iset_order(psi_bilinear_matrix_transp_columns,iorder,N_det) do l=1,N_states - call dset_order(psi_bilinear_matrix_values(1,l),iorder,N_det) + call dset_order(psi_bilinear_matrix_transp_values(1,l),iorder,N_det) enddo deallocate(iorder,to_sort) END_PROVIDER From 010afbc4f6129552364712c0d0b162bfd8069437 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 17 Mar 2017 12:17:05 +0100 Subject: [PATCH 095/106] Perturbative triples deactivated --- plugins/Full_CI_ZMQ/selection.irp.f | 1 - plugins/mrcepa0/dressing.irp.f | 66 ++++++++++++++--------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 95cc30cd..eb5db188 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -536,7 +536,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(mat(1, p1, p2) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) max_e_pert = 0d0 diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 2cd85a6c..8627a8de 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -75,9 +75,9 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen logical :: good, fullMatch integer(bit_kind),allocatable :: tq(:,:,:) - integer :: N_tq, c_ref ,degree + integer :: N_tq, c_ref ,degree1, degree2, degree - double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states) + double precision :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states), hka double precision, allocatable :: dIa_hla(:,:), dIa_sla(:,:) double precision :: haj, phase, phase2 double precision :: f(N_states), ci_inv(N_states) @@ -100,6 +100,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen !double precision, external :: get_dij, get_dij_index + leng = max(N_det_generators, N_det_non_ref) allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref)) allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) @@ -199,8 +200,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen ! |I> do i_I=1,N_det_ref ! Find triples and quadruple grand parents - call get_excitation_degree(tq(1,1,i_alpha),psi_ref(1,1,i_I),degree,Nint) - if (degree > 4) then + call get_excitation_degree(tq(1,1,i_alpha),psi_ref(1,1,i_I),degree1,Nint) + if (degree1 > 4) then cycle endif @@ -212,22 +213,14 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen do k_sd=1,idx_alpha(0) ! Loop if lambda == 0 logical :: loop -! loop = .True. -! do i_state=1,N_states -! if (lambda_mrcc(i_state,idx_alpha(k_sd)) /= 0.d0) then -! loop = .False. -! exit -! endif -! enddo -! if (loop) then -! cycle -! endif + + hka = hij_cache(k_sd) call get_excitation_degree(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),degree,Nint) if (degree > 2) then cycle endif - + ! ! !hIk = hij_mrcc(idx_alpha(k_sd),i_I) @@ -235,15 +228,15 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen ! |l> = Exc(k -> alpha) |I> - call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree,phase,Nint) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree2,phase,Nint) + call decode_exc(exc,degree2,h1,p1,h2,p2,s1,s2) do k=1,N_int tmp_det(k,1) = psi_ref(k,1,i_I) tmp_det(k,2) = psi_ref(k,2,i_I) enddo logical :: ok call apply_excitation(psi_ref(1,1,i_I), exc, tmp_det, ok, Nint) - if(.not. ok) cycle +! ok = ok .and. ( (degree2 /= 1).and.(degree /=1) ) do i_state=1,N_states dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state) @@ -253,28 +246,33 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen do i_state=1,N_states dka(i_state) = 0.d0 enddo - do l_sd=k_sd+1,idx_alpha(0) - call get_excitation_degree(tmp_det,psi_non_ref(1,1,idx_alpha(l_sd)),degree,Nint) - if (degree == 0) then - -! loop = .True. -! do i_state=1,N_states -! if (lambda_mrcc(i_state,idx_alpha(l_sd)) /= 0.d0) then -! loop = .False. -! exit -! endif -! enddo - loop = .false. - if (.not.loop) then + + if (ok) then + do l_sd=k_sd+1,idx_alpha(0) + call get_excitation_degree(tmp_det,psi_non_ref(1,1,idx_alpha(l_sd)),degree,Nint) + if (degree == 0) then call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),exc,degree,phase2,Nint) do i_state=1,N_states dka(i_state) = dij(i_I, idx_alpha(l_sd), i_state) * phase * phase2 enddo + exit endif + enddo + + + else + + ! Perturbative triples + double precision :: Delta_E + double precision, external :: diag_H_mat_elem + do i_state=1,N_states + Delta_E = psi_ref_energy_diagonalized(i_state) - diag_H_mat_elem(tq(1,1,i_alpha),N_int) + dka(i_state) = -dabs(hka / Delta_E ) +dka(i_state) = 0.d0 + enddo + + endif - exit - endif - enddo do i_state=1,N_states dIa(i_state) = dIa(i_state) + dIk(i_state) * dka(i_state) enddo From 99bcc9c04a9b5568261516a1e876484bffa6a94a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 17 Mar 2017 18:45:52 +0100 Subject: [PATCH 096/106] Removed OpenMP in pseudos --- plugins/mrcc_selected/ezfio_interface.irp.f | 2 +- .../pot_ao_pseudo_ints.irp.f | 92 +++++++------------ 2 files changed, 32 insertions(+), 62 deletions(-) diff --git a/plugins/mrcc_selected/ezfio_interface.irp.f b/plugins/mrcc_selected/ezfio_interface.irp.f index 47e7cea5..54d993fe 100644 --- a/plugins/mrcc_selected/ezfio_interface.irp.f +++ b/plugins/mrcc_selected/ezfio_interface.irp.f @@ -1,6 +1,6 @@ ! DO NOT MODIFY BY HAND ! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /ccc/work/cont003/gen1738/scemama/quantum_package/src/mrcc_selected/EZFIO.cfg +! from file /panfs/panasas/cnt0024/cpq1738/scemama/workdir/quantum_package/src/mrcc_selected/EZFIO.cfg BEGIN_PROVIDER [ double precision, thresh_dressed_ci ] diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f index b34b201e..fa855113 100644 --- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f @@ -53,26 +53,20 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu call wall_time(wall_1) call cpu_time(cpu_1) -!write(33,*) 'xxxLOCxxx' -!write(33,*) 'pseudo_klocmax', pseudo_klocmax -!write(33,*) 'pseudo_v_k_transp ', pseudo_v_k_transp -!write(33,*) 'pseudo_n_k_transp ', pseudo_n_k_transp -!write(33,*) 'pseudo_dz_k_transp', pseudo_dz_k_transp -!write(33,*) 'xxxLOCxxx' thread_num = 0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& - !$OMP num_A,num_B,Z,c,n_pt_in, & - !$OMP wall_0,wall_2,thread_num) & - !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& - !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & - !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k_transp,pseudo_n_k_transp, pseudo_dz_k_transp,& - !$OMP wall_1) - - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE (guided) +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& +! !$OMP num_A,num_B,Z,c,n_pt_in, & +! !$OMP wall_0,wall_2,thread_num) & +! !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& +! !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & +! !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k_transp,pseudo_n_k_transp, pseudo_dz_k_transp,& +! !$OMP wall_1) +! +! !$ thread_num = omp_get_thread_num() +! !$OMP DO SCHEDULE (guided) do j = 1, ao_num @@ -109,15 +103,6 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu pseudo_n_k_transp (1,k), & pseudo_dz_k_transp(1,k), & A_center,power_A,alpha,B_center,power_B,beta,C_center) -! write(33,*) i,j,k -! write(33,*) A_center,power_A,alpha,B_center,power_B,beta,C_center, & -! Vloc(pseudo_klocmax, & -! pseudo_v_k_transp (1,k), & -! pseudo_n_k_transp (1,k), & -! pseudo_dz_k_transp(1,k), & -! A_center,power_A,alpha,B_center,power_B,beta,C_center) -! write(33,*) - enddo ao_pseudo_integral_local(i,j) = ao_pseudo_integral_local(i,j) +& ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c @@ -135,8 +120,8 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu endif enddo - !$OMP END DO - !$OMP END PARALLEL +! !$OMP END DO +! !$OMP END PARALLEL END_PROVIDER @@ -165,26 +150,20 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu call wall_time(wall_1) call cpu_time(cpu_1) thread_num = 0 -!write(34,*) 'xxxNONLOCxxx' -!write(34,*) ' pseudo_lmax,pseudo_kmax', pseudo_lmax,pseudo_kmax -!write(34,*) ' pseudo_v_kl_transp(1,0,k)', pseudo_v_kl_transp -!write(34,*) ' pseudo_n_kl_transp(1,0,k)', pseudo_n_kl_transp -!write(34,*) ' pseudo_dz_kl_transp(1,0,k)', pseudo_dz_kl_transp -!write(34,*) 'xxxNONLOCxxx' - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& - !$OMP num_A,num_B,Z,c,n_pt_in, & - !$OMP wall_0,wall_2,thread_num) & - !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& - !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge,& - !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl_transp, pseudo_v_kl_transp, pseudo_dz_kl_transp,& - !$OMP wall_1) - - !$ thread_num = omp_get_thread_num() - - !$OMP DO SCHEDULE (guided) +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& +! !$OMP num_A,num_B,Z,c,n_pt_in, & +! !$OMP wall_0,wall_2,thread_num) & +! !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& +! !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge,& +! !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl_transp, pseudo_v_kl_transp, pseudo_dz_kl_transp,& +! !$OMP wall_1) +! +! !$ thread_num = omp_get_thread_num() +! +! !$OMP DO SCHEDULE (guided) do j = 1, ao_num @@ -222,15 +201,6 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu pseudo_n_kl_transp(1,0,k), & pseudo_dz_kl_transp(1,0,k), & A_center,power_A,alpha,B_center,power_B,beta,C_center) -! write(34,*) i,j,k -! write(34,*) & -! A_center,power_A,alpha,B_center,power_B,beta,C_center, & -! Vpseudo(pseudo_lmax,pseudo_kmax, & -! pseudo_v_kl_transp(1,0,k), & -! pseudo_n_kl_transp(1,0,k), & -! pseudo_dz_kl_transp(1,0,k), & -! A_center,power_A,alpha,B_center,power_B,beta,C_center) -! write(34,*) '' enddo ao_pseudo_integral_non_local(i,j) = ao_pseudo_integral_non_local(i,j) +& ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c @@ -247,10 +217,10 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu endif endif enddo - - !$OMP END DO - - !$OMP END PARALLEL +! +! !$OMP END DO +! +! !$OMP END PARALLEL END_PROVIDER From 8703a06c4faf7616e5ea276f27f1e1da84bf0f1d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 17 Mar 2017 19:05:30 +0100 Subject: [PATCH 097/106] Fixed pseudos --- src/AO_Basis/aos.irp.f | 5 +- .../pot_ao_pseudo_ints.irp.f | 68 ++++++++++--------- src/Integrals_Monoelec/pseudopot.f90 | 46 +++++-------- 3 files changed, 55 insertions(+), 64 deletions(-) diff --git a/src/AO_Basis/aos.irp.f b/src/AO_Basis/aos.irp.f index 0938d3bd..f0f03fab 100644 --- a/src/AO_Basis/aos.irp.f +++ b/src/AO_Basis/aos.irp.f @@ -182,7 +182,7 @@ integer function ao_power_index(nx,ny,nz) end -BEGIN_PROVIDER [ character*(128), l_to_charater, (0:4)] +BEGIN_PROVIDER [ character*(128), l_to_charater, (0:7)] BEGIN_DOC ! character corresponding to the "L" value of an AO orbital END_DOC @@ -192,6 +192,9 @@ BEGIN_PROVIDER [ character*(128), l_to_charater, (0:4)] l_to_charater(2)='D' l_to_charater(3)='F' l_to_charater(4)='G' + l_to_charater(5)='H' + l_to_charater(6)='I' + l_to_charater(7)='J' END_PROVIDER diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f index fa855113..7cfd6f9f 100644 --- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f @@ -51,22 +51,23 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu print*, 'Providing the nuclear electron pseudo integrals (local)' call wall_time(wall_1) + wall_0 = wall_1 call cpu_time(cpu_1) thread_num = 0 -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& -! !$OMP num_A,num_B,Z,c,n_pt_in, & -! !$OMP wall_0,wall_2,thread_num) & -! !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& -! !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & -! !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k_transp,pseudo_n_k_transp, pseudo_dz_k_transp,& -! !$OMP wall_1) -! -! !$ thread_num = omp_get_thread_num() -! !$OMP DO SCHEDULE (guided) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,n_pt_in, & + !$OMP wall_0,wall_2,thread_num) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& + !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & + !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k_transp,pseudo_n_k_transp, pseudo_dz_k_transp,& + !$OMP wall_1) + + !$ thread_num = omp_get_thread_num() + !$OMP DO SCHEDULE (guided) do j = 1, ao_num @@ -120,8 +121,8 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu endif enddo -! !$OMP END DO -! !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL END_PROVIDER @@ -148,23 +149,24 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu print*, 'Providing the nuclear electron pseudo integrals (non-local)' call wall_time(wall_1) + wall_0 = wall_1 call cpu_time(cpu_1) thread_num = 0 -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& -! !$OMP num_A,num_B,Z,c,n_pt_in, & -! !$OMP wall_0,wall_2,thread_num) & -! !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& -! !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge,& -! !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl_transp, pseudo_v_kl_transp, pseudo_dz_kl_transp,& -! !$OMP wall_1) -! -! !$ thread_num = omp_get_thread_num() -! -! !$OMP DO SCHEDULE (guided) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,n_pt_in, & + !$OMP wall_0,wall_2,thread_num) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& + !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge,& + !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl_transp, pseudo_v_kl_transp, pseudo_dz_kl_transp,& + !$OMP wall_1) + !$ thread_num = omp_get_thread_num() + + !$OMP DO SCHEDULE (guided) +! do j = 1, ao_num num_A = ao_nucl(j) @@ -217,12 +219,12 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num_align,ao_nu endif endif enddo -! -! !$OMP END DO -! -! !$OMP END PARALLEL - - + + !$OMP END DO + + !$OMP END PARALLEL + + END_PROVIDER BEGIN_PROVIDER [ double precision, pseudo_v_k_transp, (pseudo_klocmax,nucl_num) ] diff --git a/src/Integrals_Monoelec/pseudopot.f90 b/src/Integrals_Monoelec/pseudopot.f90 index d77b3ca0..a69aa42d 100644 --- a/src/Integrals_Monoelec/pseudopot.f90 +++ b/src/Integrals_Monoelec/pseudopot.f90 @@ -15,14 +15,10 @@ double precision function Vps & implicit none integer n_a(3),n_b(3) double precision g_a,g_b,a(3),b(3),c(3) -integer kmax_max,lmax_max -parameter (kmax_max=2,lmax_max=2) -integer lmax,kmax,n_kl(kmax_max,0:lmax_max) -double precision v_kl(kmax_max,0:lmax_max),dz_kl(kmax_max,0:lmax_max) -integer klocmax_max -parameter (klocmax_max=10) -integer klocmax,n_k(klocmax_max) -double precision v_k(klocmax_max),dz_k(klocmax_max) +integer lmax,kmax,n_kl(kmax,0:lmax) +double precision v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax) +integer klocmax,n_k(klocmax) +double precision v_k(klocmax),dz_k(klocmax) double precision Vloc,Vpseudo Vps=Vloc(klocmax,v_k,n_k,dz_k,a,n_a,g_a,b,n_b,g_b,c) & @@ -36,13 +32,10 @@ double precision function Vps_num & implicit none integer n_a(3),n_b(3) double precision g_a,g_b,a(3),b(3),c(3),rmax -integer kmax_max,lmax_max -parameter (kmax_max=2,lmax_max=2) -integer lmax,kmax,n_kl(kmax_max,0:lmax_max) -double precision v_kl(kmax_max,0:lmax_max),dz_kl(kmax_max,0:lmax_max) -integer klocmax_max;parameter (klocmax_max=10) -integer klocmax,n_k(klocmax_max) -double precision v_k(klocmax_max),dz_k(klocmax_max) +integer lmax,kmax,n_kl(kmax,0:lmax) +double precision v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax) +integer klocmax,n_k(klocmax) +double precision v_k(klocmax),dz_k(klocmax) double precision Vloc_num,Vpseudo_num,v1,v2 integer npts,nptsgrid nptsgrid=50 @@ -54,11 +47,9 @@ end double precision function Vloc_num(npts_over,xmax,klocmax,v_k,n_k,dz_k,a,n_a,g_a,b,n_b,g_b,c) implicit none -integer klocmax_max -parameter (klocmax_max=10) integer klocmax -double precision v_k(klocmax_max),dz_k(klocmax_max) -integer n_k(klocmax_max) +double precision v_k(klocmax),dz_k(klocmax) +integer n_k(klocmax) integer npts_over,ix,iy,iz double precision xmax,dx,x,y,z double precision a(3),b(3),c(3),term,r,orb_phi,g_a,g_b,ac(3),bc(3) @@ -705,12 +696,9 @@ end double precision function Vloc(klocmax,v_k,n_k,dz_k,a,n_a,g_a,b,n_b,g_b,c) implicit none -integer klocmax_max,lmax_max,ntot_max -parameter (klocmax_max=10,lmax_max=2) -parameter (ntot_max=10) integer klocmax -double precision v_k(klocmax_max),dz_k(klocmax_max),crochet,bigA -integer n_k(klocmax_max) +double precision v_k(klocmax),dz_k(klocmax),crochet,bigA +integer n_k(klocmax) double precision a(3),g_a,b(3),g_b,c(3),d(3) integer n_a(3),n_b(3),ntotA,ntotB,ntot,m integer i,l,k,ktot,k1,k2,k3,k1p,k2p,k3p @@ -719,6 +707,7 @@ double precision,allocatable :: array_R_loc(:,:,:) double precision,allocatable :: array_coefs(:,:,:,:,:,:) double precision int_prod_bessel_loc,binom_func,accu,prod,ylm,bigI,arg + fourpi=4.d0*dacos(-1.d0) f=fourpi**1.5d0 ac=dsqrt((a(1)-c(1))**2+(a(2)-c(2))**2+(a(3)-c(3))**2) @@ -755,8 +744,8 @@ double precision int_prod_bessel_loc,binom_func,accu,prod,ylm,bigI,arg dreal=2.d0*d2 - allocate (array_R_loc(-2:ntot_max+klocmax_max,klocmax_max,0:ntot_max)) - allocate (array_coefs(0:ntot_max,0:ntot_max,0:ntot_max,0:ntot_max,0:ntot_max,0:ntot_max)) + allocate (array_R_loc(-2:ntot+klocmax,klocmax,0:ntot)) + allocate (array_coefs(0:ntot,0:ntot,0:ntot,0:ntot,0:ntot,0:ntot)) do ktot=-2,ntotA+ntotB+klocmax do l=0,ntot @@ -2111,9 +2100,7 @@ end ! r : Distance between the Atomic Orbital center and the considered point double precision function ylm_orb(l,m,c,a,n_a,g_a,r) implicit none -integer lmax_max,ntot_max -parameter (lmax_max=2) -parameter (ntot_max=14) +integer lmax_max integer l,m double precision a(3),g_a,c(3) double precision prod,binom_func,accu,bigI,ylm,bessel_mod @@ -2131,7 +2118,6 @@ factor=fourpi*dexp(-arg) areal=2.d0*g_a*ac ntotA=n_a(1)+n_a(2)+n_a(3) -if(ntotA.gt.ntot_max)stop 'increase ntot_max' if(ac.eq.0.d0)then ylm_orb=dsqrt(fourpi)*r**ntotA*dexp(-g_a*r**2)*bigI(0,0,l,m,n_a(1),n_a(2),n_a(3)) From 767906a051bcff97e9ca3aa02d5f63f9efd531cf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 20 Mar 2017 12:11:54 +0100 Subject: [PATCH 098/106] Localized MOs --- plugins/Hartree_Fock/localize_mos.irp.f | 75 +++++++++++++++++++++++ src/MO_Basis/EZFIO.cfg | 8 ++- src/MO_Basis/cholesky_mo.irp.f | 80 +++++++++++++------------ src/MO_Basis/mos.irp.f | 1 + src/Nuclei/nuclei.irp.f | 27 ++++++++- 5 files changed, 152 insertions(+), 39 deletions(-) create mode 100644 plugins/Hartree_Fock/localize_mos.irp.f diff --git a/plugins/Hartree_Fock/localize_mos.irp.f b/plugins/Hartree_Fock/localize_mos.irp.f new file mode 100644 index 00000000..8a665c64 --- /dev/null +++ b/plugins/Hartree_Fock/localize_mos.irp.f @@ -0,0 +1,75 @@ +program localize_mos + implicit none + integer :: rank, i,j,k + double precision, allocatable :: W(:,:) + double precision :: f, f_incr + + allocate (W(ao_num,ao_num)) + + W = 0.d0 + do k=1,elec_beta_num + do j=1,ao_num + do i=1,ao_num + W(i,j) = W(i,j) + mo_coef(i,k) * mo_coef(j,k) + enddo + enddo + enddo + +! call svd_mo(ao_num,elec_beta_num,W, size(W,1), & +! mo_coef(1,1),size(mo_coef,1)) + call cholesky_mo(ao_num,elec_beta_num,W, size(W,1), & + mo_coef(1,1),size(mo_coef,1),1.d-6,rank) + print *, rank + + if (elec_alpha_num>elec_alpha_num) then + W = 0.d0 + do k=elec_beta_num+1,elec_alpha_num + do j=1,ao_num + do i=1,ao_num + W(i,j) = W(i,j) + mo_coef(i,k) * mo_coef(j,k) + enddo + enddo + enddo + +! call svd_mo(ao_num,elec_alpha_num-elec_beta_num,W, size(W,1), & +! mo_coef(1,1),size(mo_coef,1)) + call cholesky_mo(ao_num,elec_alpha_num-elec_beta_num,W, size(W,1), & + mo_coef(1,elec_beta_num+1),size(mo_coef,1),1.d-6,rank) + print *, rank + endif + + W = 0.d0 + do k=elec_alpha_num+1,mo_tot_num + do j=1,ao_num + do i=1,ao_num + W(i,j) = W(i,j) + mo_coef(i,k) * mo_coef(j,k) + enddo + enddo + enddo + +! call svd_mo(ao_num,mo_tot_num-elec_alpha_num,W, size(W,1), & +! mo_coef(1,1),size(mo_coef,1)) + call cholesky_mo(ao_num,mo_tot_num-elec_alpha_num,W, size(W,1), & + mo_coef(1,elec_alpha_num+1),size(mo_coef,1),1.d-6,rank) + print *, rank + mo_label = "Localized" + + TOUCH mo_coef + + W(1:ao_num,1:mo_tot_num) = mo_coef(1:ao_num,1:mo_tot_num) + integer :: iorder(mo_tot_num) + double precision :: s(mo_tot_num), swap(ao_num) + do k=1,mo_tot_num + iorder(k) = k + s(k) = Fock_matrix_diag_mo(k) + enddo + call dsort(s(1),iorder(1),elec_beta_num) + call dsort(s(elec_beta_num+1),iorder(elec_beta_num+1),elec_alpha_num-elec_beta_num) + call dsort(s(elec_alpha_num+1),iorder(elec_alpha_num+1),mo_tot_num-elec_alpha_num) + do k=1,mo_tot_num + mo_coef(1:ao_num,k) = W(1:ao_num,iorder(k)) + print *, k, s(k) + enddo + call save_mos + +end diff --git a/src/MO_Basis/EZFIO.cfg b/src/MO_Basis/EZFIO.cfg index 5aec39e0..368b70a0 100644 --- a/src/MO_Basis/EZFIO.cfg +++ b/src/MO_Basis/EZFIO.cfg @@ -20,7 +20,13 @@ doc: MO occupation numbers interface: ezfio size: (mo_basis.mo_tot_num) +[mo_class] +type: character*(32) +doc: c: core, i: inactive, a: active, v: virtual, d: deleted +interface: ezfio, provider +size: (mo_basis.mo_tot_num) + [ao_md5] type: character*(32) doc: Ao_md5 -interface: ezfio \ No newline at end of file +interface: ezfio diff --git a/src/MO_Basis/cholesky_mo.irp.f b/src/MO_Basis/cholesky_mo.irp.f index 97b6abd2..65184c1e 100644 --- a/src/MO_Basis/cholesky_mo.irp.f +++ b/src/MO_Basis/cholesky_mo.irp.f @@ -1,8 +1,20 @@ subroutine cholesky_mo(n,m,P,LDP,C,LDC,tol_in,rank) implicit none BEGIN_DOC -! Cholesky decomposition of AO Density matrix to -! generate MOs +! Cholesky decomposition of AO Density matrix +! +! n : Number of AOs + +! m : Number of MOs +! +! P(LDP,n) : Density matrix in AO basis +! +! C(LDC,m) : MOs +! +! tol_in : tolerance +! +! rank : Nomber of local MOs (output) +! END_DOC integer, intent(in) :: n,m, LDC, LDP double precision, intent(in) :: P(LDP,n) @@ -15,9 +27,6 @@ subroutine cholesky_mo(n,m,P,LDP,C,LDC,tol_in,rank) integer :: ipiv(n) double precision:: tol double precision, allocatable :: W(:,:), work(:) - !DEC$ ATTRIBUTES ALIGN: 32 :: W - !DEC$ ATTRIBUTES ALIGN: 32 :: work - !DEC$ ATTRIBUTES ALIGN: 32 :: ipiv allocate(W(LDC,n),work(2*n)) tol=tol_in @@ -41,40 +50,37 @@ subroutine cholesky_mo(n,m,P,LDP,C,LDC,tol_in,rank) deallocate(W,work) end -BEGIN_PROVIDER [ double precision, mo_density_matrix, (mo_tot_num_align, mo_tot_num) ] +subroutine svd_mo(n,m,P,LDP,C,LDC) implicit none BEGIN_DOC - ! Density matrix in MO basis - END_DOC - integer :: i,j,k - mo_density_matrix = 0.d0 - do k=1,mo_tot_num - if (mo_occ(k) == 0.d0) then - cycle - endif - do j=1,ao_num - do i=1,ao_num - mo_density_matrix(i,j) = mo_density_matrix(i,j) + & - mo_occ(k) * mo_coef(i,k) * mo_coef(j,k) - enddo - enddo - enddo -END_PROVIDER +! Singular value decomposition of the AO Density matrix +! +! n : Number of AOs -BEGIN_PROVIDER [ double precision, mo_density_matrix_virtual, (mo_tot_num_align, mo_tot_num) ] - implicit none - BEGIN_DOC - ! Density matrix in MO basis (virtual MOs) +! m : Number of MOs +! +! P(LDP,n) : Density matrix in AO basis +! +! C(LDC,m) : MOs +! +! tol_in : tolerance +! +! rank : Nomber of local MOs (output) +! END_DOC - integer :: i,j,k - mo_density_matrix_virtual = 0.d0 - do k=1,mo_tot_num - do j=1,ao_num - do i=1,ao_num - mo_density_matrix_virtual(i,j) = mo_density_matrix_virtual(i,j) + & - (2.d0-mo_occ(k)) * mo_coef(i,k) * mo_coef(j,k) - enddo - enddo - enddo -END_PROVIDER + integer, intent(in) :: n,m, LDC, LDP + double precision, intent(in) :: P(LDP,n) + double precision, intent(out) :: C(LDC,m) + + integer :: info + integer :: i,k + integer :: ipiv(n) + double precision:: tol + double precision, allocatable :: W(:,:), work(:) + + allocate(W(LDC,n),work(2*n)) + call svd(P,LDP,C,LDC,W,size(W,1),m,n) + + deallocate(W,work) +end diff --git a/src/MO_Basis/mos.irp.f b/src/MO_Basis/mos.irp.f index 69abf7b3..19835395 100644 --- a/src/MO_Basis/mos.irp.f +++ b/src/MO_Basis/mos.irp.f @@ -258,3 +258,4 @@ subroutine mix_mo_jk(j,k) enddo end + diff --git a/src/Nuclei/nuclei.irp.f b/src/Nuclei/nuclei.irp.f index c4729713..b4da5fb1 100644 --- a/src/Nuclei/nuclei.irp.f +++ b/src/Nuclei/nuclei.irp.f @@ -169,7 +169,7 @@ END_PROVIDER 'Nuclear repulsion energy') END_PROVIDER -BEGIN_PROVIDER [ character*(128), element_name, (54)] +BEGIN_PROVIDER [ character*(128), element_name, (78)] BEGIN_DOC ! Array of the name of element, sorted by nuclear charge (integer) END_DOC @@ -227,4 +227,29 @@ BEGIN_PROVIDER [ character*(128), element_name, (54)] element_name(52) = 'Te' element_name(53) = 'I' element_name(54) = 'Xe' + element_name(55) = 'Cs' + element_name(56) = 'Ba' + element_name(57) = 'La' + element_name(58) = 'Ce' + element_name(59) = 'Pr' + element_name(60) = 'Nd' + element_name(61) = 'Pm' + element_name(62) = 'Sm' + element_name(63) = 'Eu' + element_name(64) = 'Gd' + element_name(65) = 'Tb' + element_name(66) = 'Dy' + element_name(67) = 'Ho' + element_name(68) = 'Er' + element_name(69) = 'Tm' + element_name(70) = 'Yb' + element_name(71) = 'Lu' + element_name(72) = 'Hf' + element_name(73) = 'Ta' + element_name(74) = 'W' + element_name(75) = 'Re' + element_name(76) = 'Os' + element_name(77) = 'Ir' + element_name(78) = 'Pt' + END_PROVIDER From 57c5892d4787a6c0fc69f9dbf18301f11bef6385 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 20 Mar 2017 16:53:09 +0100 Subject: [PATCH 099/106] Plumbing --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index fd9c1a72..eb706ccb 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -352,7 +352,7 @@ subroutine get_last_full_tooth(computed, last_tooth) last_tooth = 0 combLoop : do i=comb_teeth, 1, -1 - missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-5) ! /32 + missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-12) ! /4096 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 if(.not.computed(j)) then missing -= 1 From 78198688ef4fc824570454de82cb4f6997a31c09 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 23 Mar 2017 14:48:21 +0100 Subject: [PATCH 100/106] Perturbative Triples --- plugins/mrcepa0/EZFIO.cfg | 6 ++++++ plugins/mrcepa0/dressing.irp.f | 33 +++++++++++++++------------------ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/plugins/mrcepa0/EZFIO.cfg b/plugins/mrcepa0/EZFIO.cfg index b64637e6..a2dc1bb3 100644 --- a/plugins/mrcepa0/EZFIO.cfg +++ b/plugins/mrcepa0/EZFIO.cfg @@ -14,6 +14,12 @@ type: double precision doc: Calculated energy with PT2 contribution interface: ezfio +[perturbative_triples] +type: logical +doc: Compute perturbative contribution of the Triples +interface: ezfio,provider,ocaml +default: true + [energy] type: double precision doc: Calculated energy diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 8627a8de..2fd40838 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -191,12 +191,20 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen end do end if + if (perturbative_triples) then + double precision :: Delta_E_inv(N_states) + double precision, external :: diag_H_mat_elem + do i_state=1,N_states + Delta_E_inv(i_state) = 1.d0 / (psi_ref_energy_diagonalized(i_state) - diag_H_mat_elem(tq(1,1,i_alpha),N_int) ) + enddo + endif do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd)) call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd)) enddo + ! |I> do i_I=1,N_det_ref ! Find triples and quadruple grand parents @@ -211,21 +219,13 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen ! |alpha> do k_sd=1,idx_alpha(0) - ! Loop if lambda == 0 - logical :: loop - - hka = hij_cache(k_sd) - + call get_excitation_degree(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),degree,Nint) if (degree > 2) then cycle endif ! - ! - !hIk = hij_mrcc(idx_alpha(k_sd),i_I) - ! call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),Nint,hIk) - ! |l> = Exc(k -> alpha) |I> call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree2,phase,Nint) @@ -236,7 +236,9 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen enddo logical :: ok call apply_excitation(psi_ref(1,1,i_I), exc, tmp_det, ok, Nint) -! ok = ok .and. ( (degree2 /= 1).and.(degree /=1) ) + if (perturbative_triples) then + ok = ok .and. ( (degree2 /= 1).and.(degree /=1) ) + endif do i_state=1,N_states dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state) @@ -259,16 +261,11 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_gen endif enddo + else if (perturbative_triples) then - else - - ! Perturbative triples - double precision :: Delta_E - double precision, external :: diag_H_mat_elem + hka = hij_cache(idx_alpha(k_sd)) do i_state=1,N_states - Delta_E = psi_ref_energy_diagonalized(i_state) - diag_H_mat_elem(tq(1,1,i_alpha),N_int) - dka(i_state) = -dabs(hka / Delta_E ) -dka(i_state) = 0.d0 + dka(i_state) = hka * Delta_E_inv(i_state) enddo endif From 05c88a79ba1d8ade596f7a4acd916db5b6cd2868 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 23 Mar 2017 15:13:11 +0100 Subject: [PATCH 101/106] MRCC optimizations --- src/Determinants/filter_connected.irp.f | 16 +++++++++---- src/Determinants/slater_rules.irp.f | 31 +++++++++++++++---------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index b76540f7..ed6aa6d2 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -285,7 +285,9 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do i=1, N_minilist - do j=1,Nint + mobileMask(1,1) = iand(key_mask_neg(1,1), minilist(1,1,i)) + mobileMask(1,2) = iand(key_mask_neg(1,2), minilist(1,2,i)) + do j=2,Nint mobileMask(j,1) = iand(key_mask_neg(j,1), minilist(j,1,i)) mobileMask(j,2) = iand(key_mask_neg(j,2), minilist(j,2,i)) end do @@ -296,7 +298,9 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro if(n_element(1) + n_element(2) /= 4) then idx_microlist(cur_microlist(0)) = i - do k=1,Nint + microlist(1,1,cur_microlist(0)) = minilist(1,1,i) + microlist(1,2,cur_microlist(0)) = minilist(1,2,i) + do k=2,Nint microlist(k,1,cur_microlist(0)) = minilist(k,1,i) microlist(k,2,cur_microlist(0)) = minilist(k,2,i) enddo @@ -305,8 +309,10 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do j=1,n_element(1) nt = list(j,1) idx_microlist(cur_microlist(nt)) = i + microlist(1,1,cur_microlist(nt)) = minilist(1,1,i) + microlist(1,2,cur_microlist(nt)) = minilist(1,2,i) ! TODO : Page faults - do k=1,Nint + do k=2,Nint microlist(k,1,cur_microlist(nt)) = minilist(k,1,i) microlist(k,2,cur_microlist(nt)) = minilist(k,2,i) enddo @@ -316,7 +322,9 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do j=1,n_element(2) nt = list(j,2) + mo_tot_num idx_microlist(cur_microlist(nt)) = i - do k=1,Nint + microlist(1,1,cur_microlist(nt)) = minilist(1,1,i) + microlist(1,2,cur_microlist(nt)) = minilist(1,2,i) + do k=2,Nint microlist(k,1,cur_microlist(nt)) = minilist(k,1,i) microlist(k,2,cur_microlist(nt)) = minilist(k,2,i) enddo diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 7556e2b9..eebf9611 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -925,22 +925,29 @@ subroutine create_minilist(key_mask, fullList, miniList, idx_miniList, N_fullLis N_miniList = 0 + integer :: e_ab + e_ab = n_a+n_b do i=1,N_fullList - e_a = n_a - popcnt(iand(fullList(1, 1, i), key_mask(1, 1))) - e_b = n_b - popcnt(iand(fullList(1, 2, i), key_mask(1, 2))) + e_a = e_ab - popcnt(iand(fullList(1, 1, i), key_mask(1, 1))) & + - popcnt(iand(fullList(1, 2, i), key_mask(1, 2))) do ni=2,nint - e_a -= popcnt(iand(fullList(ni, 1, i), key_mask(ni, 1))) - e_b -= popcnt(iand(fullList(ni, 2, i), key_mask(ni, 2))) + e_a = e_a - popcnt(iand(fullList(ni, 1, i), key_mask(ni, 1))) & + - popcnt(iand(fullList(ni, 2, i), key_mask(ni, 2))) end do - if(e_a + e_b <= 2) then - N_miniList = N_miniList + 1 - do ni=1,Nint - miniList(ni,1,N_miniList) = fullList(ni,1,i) - miniList(ni,2,N_miniList) = fullList(ni,2,i) - enddo - idx_miniList(N_miniList) = i - end if + if(e_a > 2) then + cycle + endif + + N_miniList = N_miniList + 1 + miniList(1,1,N_miniList) = fullList(1,1,i) + miniList(1,2,N_miniList) = fullList(1,2,i) + do ni=2,Nint + miniList(ni,1,N_miniList) = fullList(ni,1,i) + miniList(ni,2,N_miniList) = fullList(ni,2,i) + enddo + idx_miniList(N_miniList) = i + end do end subroutine From 2cd4a513dceed5fae94faf5c65d4222e906ac356 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 23 Mar 2017 15:41:27 +0100 Subject: [PATCH 102/106] Accelerated Davidson MRCC --- plugins/MRCC_Utils/davidson.irp.f | 5 +++-- src/Determinants/filter_connected.irp.f | 16 ++++------------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index b96e9585..436b89a4 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -1023,7 +1023,7 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i Vt = 0.d0 St = 0.d0 - !$OMP DO SCHEDULE(static,1) + !$OMP DO SCHEDULE(guided) do sh=1,shortcut(0,1) do sh2=sh,shortcut(0,1) exa = 0 @@ -1066,7 +1066,8 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i enddo enddo !$OMP END DO - !$OMP DO SCHEDULE(static,1) + + !$OMP DO SCHEDULE(guided) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index ed6aa6d2..b76540f7 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -285,9 +285,7 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do i=1, N_minilist - mobileMask(1,1) = iand(key_mask_neg(1,1), minilist(1,1,i)) - mobileMask(1,2) = iand(key_mask_neg(1,2), minilist(1,2,i)) - do j=2,Nint + do j=1,Nint mobileMask(j,1) = iand(key_mask_neg(j,1), minilist(j,1,i)) mobileMask(j,2) = iand(key_mask_neg(j,2), minilist(j,2,i)) end do @@ -298,9 +296,7 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro if(n_element(1) + n_element(2) /= 4) then idx_microlist(cur_microlist(0)) = i - microlist(1,1,cur_microlist(0)) = minilist(1,1,i) - microlist(1,2,cur_microlist(0)) = minilist(1,2,i) - do k=2,Nint + do k=1,Nint microlist(k,1,cur_microlist(0)) = minilist(k,1,i) microlist(k,2,cur_microlist(0)) = minilist(k,2,i) enddo @@ -309,10 +305,8 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do j=1,n_element(1) nt = list(j,1) idx_microlist(cur_microlist(nt)) = i - microlist(1,1,cur_microlist(nt)) = minilist(1,1,i) - microlist(1,2,cur_microlist(nt)) = minilist(1,2,i) ! TODO : Page faults - do k=2,Nint + do k=1,Nint microlist(k,1,cur_microlist(nt)) = minilist(k,1,i) microlist(k,2,cur_microlist(nt)) = minilist(k,2,i) enddo @@ -322,9 +316,7 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do j=1,n_element(2) nt = list(j,2) + mo_tot_num idx_microlist(cur_microlist(nt)) = i - microlist(1,1,cur_microlist(nt)) = minilist(1,1,i) - microlist(1,2,cur_microlist(nt)) = minilist(1,2,i) - do k=2,Nint + do k=1,Nint microlist(k,1,cur_microlist(nt)) = minilist(k,1,i) microlist(k,2,cur_microlist(nt)) = minilist(k,2,i) enddo From 1ae57b97f8140c711d1514908bb64ec4f881548b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 25 Mar 2017 11:56:08 +0100 Subject: [PATCH 103/106] Temporary fix for parallel davdison --- src/Davidson/davidson_parallel.irp.f | 14 +++++++++----- src/Davidson/diagonalization_hs2.irp.f | 4 ++-- src/Davidson/u0Hu0.irp.f | 3 ++- src/Utils/LinearAlgebra.irp.f | 4 ++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index eee6deb6..afb870a2 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -54,6 +54,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) org_j = sort_idx_(j,1) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) +! call i_h_j (sorted_(1,j,1),sorted_(1,i,1),n_int,hij) +! call get_s2(sorted_(1,j,1),sorted_(1,i,1),n_int,s2) if(.not. wrotten(ii)) then wrotten(ii) = .true. idx(ii) = org_i @@ -70,11 +72,15 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) enddo - if (blockb <= shortcut_(0,2)) then + if ( blockb <= shortcut_(0,2) ) then sh=blockb do sh2=sh, shortcut_(0,2), shortcut_(0,1) do i=blockb2+shortcut_(sh2,2),shortcut_(sh2+1,2)-1, istep ii += 1 + if (ii>bs) then + print *, irp_here + stop 'ii>bs' + endif org_i = sort_idx_(i,2) do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 if(i == j) cycle @@ -88,6 +94,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) if(ext == 4) then call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) +! call i_h_j (sorted_(1,j,2),sorted_(1,i,2),n_int,hij) +! call get_s2(sorted_(1,j,2),sorted_(1,i,2),n_int,s2) if(.not. wrotten(ii)) then wrotten(ii) = .true. idx(ii) = org_i @@ -133,10 +141,8 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t) integer :: i, j, k - !DIR$ IVDEP do i=1,N k = idx(i) - !DIR$ IVDEP do j=1,N_states_diag v0t(j,k) = v0t(j,k) + vt(j,i) s0t(j,k) = s0t(j,k) + st(j,i) @@ -415,9 +421,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD deallocate(idx,vt,st) integer :: i,j - !DIR$ IVDEP do j=1,N_states_diag - !DIR$ IVDEP do i=1,dav_size v0(i,j) = v0t(j,i) s0(i,j) = s0t(j,i) diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index b50ede7c..f458e32d 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -191,8 +191,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ASSERT (Nint > 0) ASSERT (Nint == N_int) - update_dets = 1 - ! Davidson iterations ! =================== @@ -213,6 +211,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo + update_dets = 1 + do while (.not.converged) do k=1,N_st_diag diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 03bf0f00..fd7efce8 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -339,7 +339,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 end do end do - istep = 1+ int(workload*target_workload_inv) +! istep = 1+ int(workload*target_workload_inv) + istep = 1 do blockb2=0, istep-1 write(tmp_task,'(3(I9,X),''|'',X)') sh, blockb2, istep task = task//tmp_task diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 44a15ddf..2c318688 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -152,8 +152,8 @@ subroutine ortho_qr(A,LDA,m,n) LWORK=2*WORK(1) deallocate(WORK) allocate(WORK(LWORK)) - call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) - call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO) + call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) + call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO) deallocate(WORK,jpvt,tau) end From 3d21999c7ed3aa6b24f653f2c7c621a5ef54bd75 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 25 Mar 2017 11:57:06 +0100 Subject: [PATCH 104/106] ZMQ checks --- .../selection_davidson_slave.irp.f | 2 +- plugins/mrcc_selected/ezfio_interface.irp.f | 2 +- src/Davidson/davidson_parallel.irp.f | 29 +++++++++++++++++-- src/Davidson/u0Hu0.irp.f | 2 +- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index 3d10612f..a1e365a4 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -23,7 +23,7 @@ subroutine run_wf integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states) - character*(64) :: states(2) + character*(64) :: states(4) integer :: rc, i logical :: force_update diff --git a/plugins/mrcc_selected/ezfio_interface.irp.f b/plugins/mrcc_selected/ezfio_interface.irp.f index 54d993fe..47e7cea5 100644 --- a/plugins/mrcc_selected/ezfio_interface.irp.f +++ b/plugins/mrcc_selected/ezfio_interface.irp.f @@ -1,6 +1,6 @@ ! DO NOT MODIFY BY HAND ! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py -! from file /panfs/panasas/cnt0024/cpq1738/scemama/workdir/quantum_package/src/mrcc_selected/EZFIO.cfg +! from file /ccc/work/cont003/gen1738/scemama/quantum_package/src/mrcc_selected/EZFIO.cfg BEGIN_PROVIDER [ double precision, thresh_dressed_ci ] diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index eee6deb6..0495431d 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -145,13 +145,14 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t) end subroutine -subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st,update_dets) +subroutine davidson_init(zmq_to_qp_run_socket,dets_in,u,n0,n,n_st,update_dets) use f77_zmq implicit none integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket integer, intent(in) :: n0,n, n_st, update_dets double precision, intent(in) :: u(n0,n_st) + integer(bit_kind), intent(in) :: dets_in(N_int,2,n) integer :: i,k @@ -160,8 +161,8 @@ subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st,update_dets) touch dav_size do i=1,dav_size do k=1,N_int - dav_det(k,1,i) = psi_det(k,1,i) - dav_det(k,2,i) = psi_det(k,2,i) + dav_det(k,1,i) = dets_in(k,1,i) + dav_det(k,2,i) = dets_in(k,2,i) enddo enddo touch dav_det @@ -527,18 +528,40 @@ subroutine davidson_miniserver_get(force_update) rc = f77_zmq_connect(requester,address) rc = f77_zmq_send(requester, 'ut', 2, 0) + rc = f77_zmq_recv(requester, update_dets, 4, 0) + if (rc /= 4) then + print *, irp_here, ': f77_zmq_recv(requester, update_dets, 4, 0)' + print *, irp_here, ': rc = ', rc + endif + rc = f77_zmq_recv(requester, dav_size, 4, 0) + if (rc /= 4) then + print *, irp_here, ': f77_zmq_recv(requester, dav_size, 4, 0)' + print *, irp_here, ': rc = ', rc + endif if (update_dets == 1 .or. force_update) then TOUCH dav_size endif rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) + if (rc /= 8*dav_size*N_states_diag) then + print *, irp_here, ': f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0)' + print *, irp_here, ': rc = ', rc + endif SOFT_TOUCH dav_ut if (update_dets == 1 .or. force_update) then rc = f77_zmq_send(requester, 'det', 3, 0) rc = f77_zmq_recv(requester, dav_size, 4, 0) + if (rc /= 4) then + print *, irp_here, ': f77_zmq_recv(requester, dav_size, 4, 0)' + print *, irp_here, ': rc = ', rc + endif rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) + if (rc /= 16*N_int*dav_size) then + print *, irp_here, ': f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0)' + print *, irp_here, ': rc = ', rc + endif SOFT_TOUCH dav_det endif diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 03bf0f00..de4851f5 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -308,7 +308,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ v_0 = 0.d0 s_0 = 0.d0 - call davidson_init(handler,u_0,size(u_0,1),n,N_st,update_dets) + call davidson_init(handler,keys_tmp,u_0,size(u_0,1),n,N_st,update_dets) ave_workload = 0.d0 do sh=1,shortcut_(0,1) From 650a1a1956faac135e905d430315d674a97a3061 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 26 Mar 2017 23:43:48 +0200 Subject: [PATCH 105/106] Integrals promela --- promela/integrals.pml | 197 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 promela/integrals.pml diff --git a/promela/integrals.pml b/promela/integrals.pml new file mode 100644 index 00000000..9b9e0273 --- /dev/null +++ b/promela/integrals.pml @@ -0,0 +1,197 @@ +#define NPROC 3 +#define BUFSIZE 2 +#define NTASKS 5 + +#define STATE 1 + +mtype = { NONE, OK, WRONG_STATE, TERMINATE, GETPSI, PUTPSI, NEWJOB, ENDJOB, SETRUNNING, + SETWAITING, SETSTOPPED, CONNECT, DISCONNECT, ADDTASK, DELTASK, TASKDONE, GETTASK, + PSI, TASK + } + +typedef rep_message { + mtype m = NONE; + byte value = 0; +} + +typedef req_message { + mtype m = NONE; + byte value = 0; + chan reply = [BUFSIZE] of { rep_message }; +} + +#define send_req( MESSAGE, VALUE ) msg.m=MESSAGE ; msg.value=VALUE ; rep_socket ! msg; msg.reply ? reply + +chan rep_socket = [NPROC] of { req_message }; +chan pull_socket = [NPROC] of { byte }; +chan pair_socket = [NPROC] of { req_message }; +chan task_queue = [NTASKS+2] of { byte }; + + +active proctype qp_run() { + + bit psi = 0; + byte running = 0; + bit state = 0; + bit terminate = 0; + byte ntasks = 0; + req_message msg; + rep_message reply; + byte nclients = 0; + byte task; + + do + :: ( (terminate == 1) && (nclients == 0) && (ntasks == 0) ) -> break + :: else -> + + rep_socket ? msg; + printf("req: "); printm(msg.m); printf("\t%d\n",msg.value); + + if + :: ( msg.m == TERMINATE ) -> + assert (state != 0); + terminate = 1; + reply.m = OK; + + :: ( msg.m == PUTPSI ) -> + assert (state != 0); + assert (psi == 0); + psi = 1; + reply.m = OK; + + :: ( msg.m == GETPSI ) -> + assert (state != 0); + assert (psi == 1); + reply.m = PSI; + + :: ( msg.m == NEWJOB ) -> + state = msg.value + reply.m = OK; + + :: ( msg.m == ADDTASK ) -> + assert (state != 0); + task_queue ! msg.value; + ntasks++; + reply.m = OK; + + :: ( msg.m == GETTASK ) -> + assert (nclients > 0); + assert (state != 0); + if + :: ( task_queue ?[task] ) -> + reply.m = TASK; + task_queue ? reply.value + :: else -> + reply.m = NONE; + reply.value = 255; + fi; + + :: ( msg.m == TASKDONE) -> + assert (state != 0); + assert (nclients > 0); + assert (ntasks > 0); + reply.m = OK; + + :: ( msg.m == DELTASK ) -> + assert (state != 0); + ntasks--; + if + :: (ntasks > 0) -> reply.value = 1; + :: else -> reply.value = 0; + fi; + reply.m = OK; + + :: ( msg.m == CONNECT ) -> + nclients++; + reply.m = OK; + + :: ( msg.m == DISCONNECT ) -> + nclients--; + reply.m = OK; + + fi + msg.reply ! reply + od + +} + + +active proctype master() { + + req_message msg; + rep_message reply; + byte count; + + /* New parallel job */ + send_req( NEWJOB, STATE ); + assert (reply.m == OK); + + /* Add tasks */ + count = 0; + do + :: (count == NTASKS) -> break; + :: else -> + count++; + send_req( ADDTASK, count ); + assert (reply.m == OK); + od + + /* Run collector */ + run collector(); + + /* Run slaves */ + count = 0; + do + :: (count == NPROC) -> break; + :: else -> count++; run slave(); + od + +} + +proctype slave() { + + req_message msg; + rep_message reply; + byte task; + + send_req( CONNECT, 0 ); + assert (reply.m == OK); + + task = 1; + do + :: (task == 255) -> break; + :: else -> + send_req( GETTASK, 0); + if + :: (reply.m == NONE) -> + task = 255; + :: (reply.m == TASK) -> + /* Compute task */ + task = reply.value; + send_req( TASKDONE, task); + assert (reply.m == OK); + pull_socket ! task; + fi + od + send_req( DISCONNECT, 0); + assert (reply.m == OK); + +} + +proctype collector() { + byte task; + req_message msg; + rep_message reply; + bit loop = 1; + do + :: (loop == 0) -> break + :: else -> + pull_socket ? task; + /* Handle result */ + send_req(DELTASK, task); + assert (reply.m == OK); + loop = reply.value; + od + send_req( TERMINATE, 0); + assert (reply.m == OK); +} From b6ea2a8a450bd338aae427f19c2fbd0a9cc8be30 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 27 Mar 2017 13:20:01 +0200 Subject: [PATCH 106/106] Working on promela --- promela/integrals.pml | 111 +++++++++++++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/promela/integrals.pml b/promela/integrals.pml index 9b9e0273..7b05156f 100644 --- a/promela/integrals.pml +++ b/promela/integrals.pml @@ -1,12 +1,10 @@ -#define NPROC 3 +#define NPROC 1 #define BUFSIZE 2 -#define NTASKS 5 - -#define STATE 1 +#define NTASKS 3 mtype = { NONE, OK, WRONG_STATE, TERMINATE, GETPSI, PUTPSI, NEWJOB, ENDJOB, SETRUNNING, SETWAITING, SETSTOPPED, CONNECT, DISCONNECT, ADDTASK, DELTASK, TASKDONE, GETTASK, - PSI, TASK + PSI, TASK, PUTPSI_REPLY, WAITING, RUNNING, STOPPED } typedef rep_message { @@ -16,32 +14,41 @@ typedef rep_message { typedef req_message { mtype m = NONE; + byte state = 0; byte value = 0; chan reply = [BUFSIZE] of { rep_message }; } -#define send_req( MESSAGE, VALUE ) msg.m=MESSAGE ; msg.value=VALUE ; rep_socket ! msg; msg.reply ? reply +#define send_req( MESSAGE, VALUE ) msg.m=MESSAGE ; msg.value=VALUE ; msg.state=state; rep_socket ! msg; msg.reply ? reply chan rep_socket = [NPROC] of { req_message }; chan pull_socket = [NPROC] of { byte }; chan pair_socket = [NPROC] of { req_message }; chan task_queue = [NTASKS+2] of { byte }; +chan pub_socket = [NTASKS+2] of { mtype }; +bit socket_up = 0; +mtype global_state; /* Sent by pub */ active proctype qp_run() { bit psi = 0; - byte running = 0; - bit state = 0; - bit terminate = 0; + bit address_tcp = 0; + bit address_inproc = 0; + bit running = 0; + byte status = 0; + byte state = 0; byte ntasks = 0; req_message msg; rep_message reply; byte nclients = 0; byte task; + socket_up = 1; + running = 1; do - :: ( (terminate == 1) && (nclients == 0) && (ntasks == 0) ) -> break +// :: ( (running == 0) && (nclients == 0) && (ntasks == 0) ) -> break + :: ( running == 0 ) -> break :: else -> rep_socket ? msg; @@ -50,26 +57,40 @@ active proctype qp_run() { if :: ( msg.m == TERMINATE ) -> assert (state != 0); - terminate = 1; + assert (msg.state == state); + running = 0; reply.m = OK; :: ( msg.m == PUTPSI ) -> assert (state != 0); + assert (msg.state == state); assert (psi == 0); psi = 1; - reply.m = OK; + reply.m = PUTPSI_REPLY; :: ( msg.m == GETPSI ) -> assert (state != 0); + assert (msg.state == state); assert (psi == 1); reply.m = PSI; :: ( msg.m == NEWJOB ) -> - state = msg.value + assert (state == 0); + state = msg.value; + pair_socket ! WAITING; + reply.m = OK; + reply.value = state; + + :: ( msg.m == ENDJOB ) -> + assert (state != 0); + assert (msg.state == state); + state = 0; + pair_socket ! WAITING; reply.m = OK; :: ( msg.m == ADDTASK ) -> assert (state != 0); + assert (msg.state == state); task_queue ! msg.value; ntasks++; reply.m = OK; @@ -77,23 +98,28 @@ active proctype qp_run() { :: ( msg.m == GETTASK ) -> assert (nclients > 0); assert (state != 0); + assert (msg.state == state); if :: ( task_queue ?[task] ) -> + pair_socket ! WAITING; reply.m = TASK; task_queue ? reply.value :: else -> + pair_socket ! RUNNING; reply.m = NONE; reply.value = 255; fi; :: ( msg.m == TASKDONE) -> assert (state != 0); + assert (msg.state == state); assert (nclients > 0); assert (ntasks > 0); reply.m = OK; :: ( msg.m == DELTASK ) -> assert (state != 0); + assert (msg.state == state); ntasks--; if :: (ntasks > 0) -> reply.value = 1; @@ -102,16 +128,34 @@ active proctype qp_run() { reply.m = OK; :: ( msg.m == CONNECT ) -> + assert ( state != 0 ) nclients++; reply.m = OK; + reply.value = state; :: ( msg.m == DISCONNECT ) -> + assert ( msg.state == state ) nclients--; reply.m = OK; + :: ( msg.m == STOPPED ) -> + pair_socket ! STOPPED; + reply.m = OK; + + :: ( msg.m == WAITING ) -> + pair_socket ! WAITING; + reply.m = OK; + + :: ( msg.m == RUNNING ) -> + assert ( state != 0 ); + pair_socket ! RUNNING; + reply.m = OK; + fi msg.reply ! reply od + pair_socket ! STOPPED; + socket_up = 0; } @@ -120,10 +164,14 @@ active proctype master() { req_message msg; rep_message reply; + byte state = 0; byte count; + run pub_thread(); + /* New parallel job */ - send_req( NEWJOB, STATE ); + state=1; + send_req( NEWJOB, state ); assert (reply.m == OK); /* Add tasks */ @@ -137,7 +185,7 @@ active proctype master() { od /* Run collector */ - run collector(); + run collector(state); /* Run slaves */ count = 0; @@ -153,9 +201,25 @@ proctype slave() { req_message msg; rep_message reply; byte task; + byte state; - send_req( CONNECT, 0 ); - assert (reply.m == OK); + msg.m=CONNECT; + msg.state = 0; + + if + :: (!socket_up) -> goto exit; + :: else -> skip; + fi + rep_socket ! msg; + + if + :: (!socket_up) -> goto exit; + :: else -> skip; + fi + msg.reply ? reply; + + state = reply.value; + task = 1; do @@ -176,9 +240,10 @@ proctype slave() { send_req( DISCONNECT, 0); assert (reply.m == OK); +exit: skip; } -proctype collector() { +proctype collector(byte state) { byte task; req_message msg; rep_message reply; @@ -195,3 +260,13 @@ proctype collector() { send_req( TERMINATE, 0); assert (reply.m == OK); } + +proctype pub_thread() { + mtype state = WAITING; + do + :: (state == STOPPED) -> break; + :: (pair_socket ? [state]) -> + pair_socket ? state; + global_state = state; + od +}