diff --git a/config/gfortran_avx.cfg b/config/gfortran_avx.cfg index 6672bca1..80bbbec9 100644 --- a/config/gfortran_avx.cfg +++ b/config/gfortran_avx.cfg @@ -10,7 +10,7 @@ # # [COMMON] -FC : gfortran -ffree-line-length-none -I . -mavx +FC : gfortran -ffree-line-length-none -I . -mavx -g LAPACK_LIB : -llapack -lblas IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 47e79b7c..42337258 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -133,7 +133,7 @@ subroutine ZMQ_selection(N, pt2) integer :: i_generator, i_generator_start, i_generator_max, step ! step = int(max(1.,10*elec_num/mo_tot_num) - step = int(10000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) + step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) do i= N_det_generators, 1, -step i_generator_start = max(i-step+1,1) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index ff32d56b..a0209cc5 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -1,19 +1,5 @@ use bitmasks -! BEGIN_PROVIDER [ double precision, integral8, (mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) ] -! use bitmasks -! implicit none -! -! integer :: h1, h2 -! -! integral8 = 0d0 -! do h1=1, mo_tot_num -! do h2=1, mo_tot_num -! call get_mo_bielec_integrals_ij(h1, h2 ,mo_tot_num,integral8(1,1,h1,h2),mo_integrals_map) -! end do -! end do -! END_PROVIDER - double precision function integral8(i,j,k,l) implicit none @@ -56,14 +42,12 @@ subroutine get_mask_phase(det, phasemask) integer :: s, ni, i logical :: change -! phasemask = 0_8 phasemask = 0_1 do s=1,2 change = .false. do ni=1,N_int do i=0,bit_kind_size-1 if(BTEST(det(ni, s), i)) change = .not. change -! if(change) phasemask(ni, s) = ibset(phasemask(ni, s), i) if(change) phasemask((ni-1)*bit_kind_size + i + 1, s) = 1_1 end do end do @@ -100,21 +84,6 @@ subroutine select_connected(i_generator,E0,pt2,b) end subroutine -subroutine spot_occupied(mask, bannedOrb) - use bitmasks - implicit none - - integer(bit_kind),intent(in) :: mask(N_int) - logical, intent(inout) :: bannedOrb(mo_tot_num) - integer :: i, ne, list(mo_tot_num) - - call bitstring_to_list(mask, list, ne, N_int) - do i=1, ne - bannedOrb(list(i)) = .true. - end do -end subroutine - - double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) use bitmasks implicit none @@ -125,16 +94,6 @@ double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2) integer(1) :: np double precision, parameter :: res(0:1) = (/1d0, -1d0/) -! call assert(s1 /= s2 .or. (h1 <= h2 .and. p1 <= p2), irp_here) -! np = 0 -! change = btest(phasemask(1+ishft(h1, -6), s1), iand(h1-1, 63)) -! change = xor(change, btest(phasemask(1+ishft(p1, -6), s1), iand(p1-1, 63))) -! if(xor(change, p1 < h1)) np = 1 -! -! change = btest(phasemask(1+ishft(h2, -6), s2), iand(h2-1, 63)) -! change = xor(change, btest(phasemask(1+ishft(p2, -6), s2), iand(p2-1, 63))) -! if(xor(change, p2 < h2)) np = np + 1 - np = phasemask(h1,s1) + phasemask(p1,s1) + phasemask(h2,s2) + phasemask(p2,s2) if(p1 < h1) np = np + 1_1 if(p2 < h2) np = np + 1_1 diff --git a/plugins/Full_CI_ZMQ/selection_double.irp.f b/plugins/Full_CI_ZMQ/selection_double.irp.f index 3e602c21..977622fd 100644 --- a/plugins/Full_CI_ZMQ/selection_double.irp.f +++ b/plugins/Full_CI_ZMQ/selection_double.irp.f @@ -12,10 +12,17 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p type(selection_buffer), intent(inout) :: buf double precision :: mat(N_states, mo_tot_num, mo_tot_num) - integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i - integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + integer :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2) logical :: fullMatch, ok + integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2) + integer,allocatable :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:) + integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) + + allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) + allocate(preinteresting(0:N_det_selectors), prefullinteresting(0:N_det), interesting(0:N_det_selectors), fullinteresting(0:N_det)) + do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) hole (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2)) @@ -30,27 +37,104 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + preinteresting(0) = 0 + prefullinteresting(0) = 0 + + do i=1,N_int + negMask(i,1) = not(psi_det_generators(i,1,i_generator)) + negMask(i,2) = not(psi_det_generators(i,2,i_generator)) + end do + + do i=1,N_det + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do - !call assert(psi_det_generators(1,1,i_generator) == psi_det_sorted(1,1,i_generator), "sorted selex") + if(nt <= 4) then + if(i <= N_det_selectors) then + preinteresting(0) += 1 + preinteresting(preinteresting(0)) = i + else if(nt <= 2) then + prefullinteresting(0) += 1 + prefullinteresting(prefullinteresting(0)) = i + end if + end if + end do + + do s1=1,2 - do s2=s1,2 - sp = s1 - if(s1 /= s2) sp = 3 - do i1=N_holes(s1),1,-1 ! Generate low excitations first + do i1=N_holes(s1),1,-1 ! Generate low excitations first + h1 = hole_list(i1,s1) + call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) + + do i=1,N_int + negMask(i,1) = not(pmask(i,1)) + negMask(i,2) = not(pmask(i,2)) + end do + + interesting(0) = 0 + fullinteresting(0) = 0 + + do ii=1,preinteresting(0) + i = preinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 4) then + interesting(0) += 1 + interesting(interesting(0)) = i + minilist(:,:,interesting(0)) = psi_det_sorted(:,:,i) + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(:,:,fullinteresting(0)) = psi_det_sorted(:,:,i) + end if + end if + end do + + do ii=1,prefullinteresting(0) + i = prefullinteresting(ii) + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt <= 2) then + fullinteresting(0) += 1 + fullinteresting(fullinteresting(0)) = i + fullminilist(:,:,fullinteresting(0)) = psi_det_sorted(:,:,i) + end if + end do + + do s2=s1,2 + sp = s1 + if(s1 /= s2) sp = 3 + ib = 1 if(s1 == s2) ib = i1+1 do i2=N_holes(s2),ib,-1 ! Generate low excitations first - h1 = hole_list(i1,s1) + h2 = hole_list(i2,s2) - call apply_holes(psi_det_generators(1,1,i_generator), s1,h1,s2,h2, mask, ok, N_int) - !call assert(ok, irp_here) + call apply_hole(pmask, s2,h2, mask, ok, N_int) logical :: banned(mo_tot_num, mo_tot_num,2) logical :: bannedOrb(mo_tot_num, 2) banned = .false. - bannedOrb(h1, s1) = .true. - bannedOrb(h2, s2) = .true. + + call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) + + if(fullMatch) cycle bannedOrb(1:mo_tot_num, 1:2) = .true. do s3=1,2 @@ -58,15 +142,9 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p bannedOrb(particle_list(i,s3), s3) = .false. enddo enddo - - - call spot_isinwf(mask, psi_det_sorted, i_generator, N_det, banned, fullMatch) - if(fullMatch) cycle - if(sp /= 2) call spot_occupied(mask(1,1), bannedOrb(1,1)) - if(sp /= 1) call spot_occupied(mask(1,2), bannedOrb(1,2)) - + mat = 0d0 - call splash_pq(mask, sp, psi_det_sorted, i_generator, N_det_selectors, bannedOrb, banned, mat) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, mat, buf) enddo enddo @@ -88,14 +166,13 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision, intent(inout) :: pt2(N_states) type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib, j + integer :: s1, s2, p1, p2, ib, j, istate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii + double precision :: e_pert, delta_E, val, Hii, max_e_pert double precision, external :: diag_H_mat_elem_fock logical, external :: detEq - - if(N_states > 1) stop "fill_buffer_double N_states > 1" + if(sp == 3) then s1 = 1 @@ -106,7 +183,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d end if call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) - !call assert(ok, "sosoqs") + do p1=1,mo_tot_num if(bannedOrb(p1, s1)) cycle ib = 1 @@ -116,55 +193,57 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(banned(p1,p2)) cycle if(mat(1, p1, p2) == 0d0) cycle call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - !call assert(ok, "ododod") - val = mat(1, p1, p2) + Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) - - delta_E = E0(1) - Hii - if (delta_E < 0.d0) then - e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - else - e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - endif - pt2(1) += e_pert - if(dabs(e_pert) > buf%mini) then -! do j=1,buf%cur-1 -! if(detEq(buf%det(1,1,j), det, N_int)) then -! print *, "tops" -! print *, i_generator, s1, s2, h1, h2,p1,p2 -! stop -! end if -! end do - call add_to_selection_buffer(buf, det, e_pert) + max_e_pert = 0d0 + + do istate=1,N_states + delta_E = E0(istate) - Hii + val = mat(istate, p1, p2) + if (delta_E < 0.d0) then + e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + else + e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + endif + pt2(istate) += e_pert + if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert + end do + + if(dabs(max_e_pert) > buf%mini) then + call add_to_selection_buffer(buf, det, max_e_pert) end if end do end do end subroutine -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) use bitmasks implicit none - + + integer, intent(in) :: interesting(0:N_sel) + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) integer, intent(in) :: sp, i_gen, N_sel logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num) - integer :: i, j, k, l, h(0:2,2), p(0:4,2), nt + integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) - logical :: bandon - +! logical :: bandon +! +! bandon = .false. mat = 0d0 - bandon = .false. do i=1,N_int negMask(i,1) = not(mask(i,1)) negMask(i,2) = not(mask(i,2)) end do - do i=1, N_sel + do i=1, N_sel ! interesting(0) + !i = interesting(ii) + nt = 0 do j=1,N_int mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) @@ -173,7 +252,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) end do if(nt > 4) cycle - + do j=1,N_int perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) @@ -185,14 +264,12 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) - !call assert(nt >= 2, irp_here//"qsd") - if(i < i_gen) then + if(interesting(i) < i_gen) then if(nt == 4) call past_d2(banned, p, sp) if(nt == 3) call past_d1(bannedOrb, p) - !call assert(nt /= 2, "should have been discarded") else - if(i == i_gen) then - bandon = .true. + if(interesting(i) == i_gen) then +! bandon = .true. if(sp == 3) then banned(:,:,2) = transpose(banned(:,:,1)) else @@ -204,15 +281,14 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat) end if end if if(nt == 4) then - call get_d2(det(1,1,i), psi_phasemask(1,1,i), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, i)) + call get_d2(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), psi_phasemask(1,1,i), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, i)) + call get_d1(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else - call get_d0(det(1,1,i), psi_phasemask(1,1,i), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, i)) + call get_d0(det(1,1,i), psi_phasemask(1,1,interesting(i)), bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) end if end if end do - call assert(bandon, "BANDON") end subroutine @@ -241,13 +317,12 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) bant = 1 tip = p(0,1) * p(0,2) - !call assert(p(0,1) + p(0,2) == 4, irp_here//"df") + ma = sp if(p(0,1) > p(0,2)) ma = 1 if(p(0,1) < p(0,2)) ma = 2 mi = mod(ma, 2) + 1 - !print *, "d2 SPtip", SP, tip if(sp == 3) then if(ma == 2) bant = 2 @@ -264,7 +339,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = h(2, ma) hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) if(ma == 1) then mat(:, putj, puti) += coefs * hij else @@ -272,7 +346,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end if end do else - !call assert(tip == 4, "df") do i = 1,2 do j = 1,2 puti = p(i, 1) @@ -285,7 +358,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h2 = h(1,2) hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, 1, 2, puti, putj) mat(:, puti, putj) += coefs * hij end do end do @@ -306,7 +378,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) mat(:, puti, putj) += coefs * hij end do end do @@ -314,7 +385,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h1 = h(1, mi) h2 = h(1, ma) p1 = p(1, mi) - !call assert(ma == sp, "dldl") do i=1,3 puti = p(turn3(1,i), ma) putj = p(turn3(2,i), ma) @@ -322,11 +392,9 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) mat(:, min(puti, putj), max(puti, putj)) += coefs * hij end do else ! tip == 4 - !call assert(tip == 4, "qsdf") puti = p(1, sp) putj = p(2, sp) if(.not. banned(puti,putj,1)) then @@ -335,7 +403,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) h1 = h(1, mi) h2 = h(2, mi) hij = (integral8(p1, p2, h1, h2) - integral8(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask,ma,ma, puti, putj) mat(:, puti, putj) += coefs * hij end if end if @@ -343,34 +410,6 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end subroutine -subroutine debug_hij(hij, gen, mask, s1, s2, p1, p2) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int,2), mask(N_int,2) - double precision, intent(in) :: hij - integer, intent(in) :: s1, s2, p1, p2 - integer(bit_kind) :: det(N_int,2) - double precision :: hij_ref, phase_ref - logical :: ok - integer :: degree - integer :: exc(0:2,2,2) - - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - !call assert(ok, "nokey") - call i_H_j_phase_out(gen,det,N_int,hij_ref,phase_ref,exc,degree) - if(hij /= hij_ref) then - print *, hij, hij_ref - print *, s1, s2, p1, p2 - call debug_det(gen, N_int) - call debug_det(mask, N_int) - stop - end if - - ! print *, "fourar", hij, hij_ref,s1,s2 -end function - - subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) use bitmasks implicit none @@ -409,11 +448,9 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) mi = turn2(ma) bant = 1 - !print *, "d1 SP", sp, p(0,1)*p(0,2) if(sp == 3) then !move MA - !call assert(p(0,1)*p(0,2) == 2, "ddmmm") if(ma == 2) bant = 2 puti = p(1,mi) hfix = h(1,ma) @@ -424,13 +461,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=1, hfix-1 if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) tmp_row(1:N_states,putj) += hij * coefs(1:N_states) end do do putj=hfix+1, mo_tot_num if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) tmp_row(1:N_states,putj) += hij * coefs(1:N_states) end do @@ -454,11 +489,9 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) tmp_row(:,puti) += hij * coefs end if - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) putj = p2 if(.not. banned(putj,puti,bant)) then hij = integral8(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix) - !call debug_hij(hij, gen, mask, mi, ma, puti, putj) tmp_row2(:,puti) += hij * coefs end if end do @@ -481,13 +514,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=1,hfix-1 if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) tmp_row(:,putj) += hij * coefs end do do putj=hfix+1,mo_tot_num if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2) - !call debug_hij(hij, gen, mask, ma, ma, puti, putj) tmp_row(:,putj) += hij * coefs end do @@ -495,7 +526,6 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) mat(:, puti, puti:) += tmp_row(:,puti:) end do else - !call assert(sp == ma, "sp == ma") hfix = h(1,mi) pfix = p(1,mi) p1 = p(1,ma) @@ -507,14 +537,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 if(.not. banned(puti,putj,1)) then hij = integral8(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1) - !call debug_hij(hij, gen, mask, ma, ma, putj, puti) tmp_row(:,puti) += hij * coefs end if putj = p1 if(.not. banned(puti,putj,1)) then hij = integral8(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2) - !call debug_hij(hij, gen, mask, ma, ma, putj, puti) tmp_row2(:,puti) += hij * coefs end if end do @@ -571,7 +599,6 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer :: bant bant = 1 - !print *, "d0 SP", sp if(sp == 3) then ! AB h1 = p(1,1) @@ -583,15 +610,12 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) if(banned(p1, p2, bant)) cycle ! rentable? if(p1 == h1 .or. p2 == h2) then call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - !call assert(ok, "zsdq") call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) += coefs(:) * hij else hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2) - !call debug_hij(hij, gen, mask, 1, 2, p1, p2) - mat(:, p1, p2) += coefs(:) * hij end if + mat(:, p1, p2) += coefs(:) * hij end do end do else ! AA BB @@ -605,12 +629,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - mat(:, puti, putj) += coefs(:) * hij else hij = (integral8(p1, p2, puti, putj) - integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2) - mat(:, puti, putj) += coefs(:) * hij - !call debug_hij(hij, gen, mask, sp, sp, puti, putj) end if + mat(:, puti, putj) += coefs(:) * hij end do end do end if @@ -659,10 +681,11 @@ end subroutine -subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch) +subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none + integer, intent(in) :: interesting(0:N) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) integer, intent(in) :: i_gen, N logical, intent(inout) :: banned(mo_tot_num, mo_tot_num) @@ -685,7 +708,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch) if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl end do - if(i < i_gen) then + if(interesting(i) < i_gen) then fullMatch = .true. return end if @@ -697,8 +720,6 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch) call bitstring_to_list(myMask(1,1), list(1), na, N_int) call bitstring_to_list(myMask(1,2), list(na+1), nb, N_int) - !call assert(na + nb == 2, "oyo") - !call assert(na == 1 .or. list(1) < list(2), "sqdsmmmm") banned(list(1), list(2)) = .true. end do genl end subroutine diff --git a/plugins/Full_CI_ZMQ/selection_single.irp.f b/plugins/Full_CI_ZMQ/selection_single.irp.f index 77d985af..f107db11 100644 --- a/plugins/Full_CI_ZMQ/selection_single.irp.f +++ b/plugins/Full_CI_ZMQ/selection_single.irp.f @@ -43,14 +43,12 @@ subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf do i=1, N_holes(sp) h1 = hole_list(i,sp) call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) - !call assert(ok, irp_here) bannedOrb = .true. do j=1,N_particles(sp) bannedOrb(particle_list(j, sp)) = .false. end do call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) if(fullMatch) cycle - call spot_occupied(mask(1,sp), bannedOrb) vect = 0d0 call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_phasemask(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, vect) call fill_buffer_single(i_gen, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) @@ -72,12 +70,11 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, double precision, intent(inout) :: pt2(N_states) type(selection_buffer), intent(inout) :: buf logical :: ok - integer :: s1, s2, p1, p2, ib + integer :: s1, s2, p1, p2, ib, istate integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) - double precision :: e_pert, delta_E, val, Hii + double precision :: e_pert, delta_E, val, Hii, max_e_pert double precision, external :: diag_H_mat_elem_fock - if(N_states > 1) stop "fill_buffer_single N_states > 1" call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) @@ -85,18 +82,24 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, if(bannedOrb(p1)) cycle if(vect(1, p1) == 0d0) cycle call apply_particle(mask, sp, p1, det, ok, N_int) - val = vect(1, p1) + Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) + max_e_pert = 0d0 - delta_E = E0(1) - Hii - if (delta_E < 0.d0) then - e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - else - e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) - endif - pt2(1) += e_pert - if(dabs(e_pert) > buf%mini) call add_to_selection_buffer(buf, det, e_pert) + do istate=1,N_states + val = vect(istate, p1) + delta_E = E0(istate) - Hii + if (delta_E < 0.d0) then + e_pert = 0.5d0 * (-dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + else + e_pert = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * val * val) - delta_E) + endif + pt2(istate) += e_pert + if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert + end do + + if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) end do end subroutine @@ -179,7 +182,6 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) p2 = p(turn3_2(2,i), sp) hij = integral8(p1, p2, h1, h2) - integral8(p2, p1, h1, h2) hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) - !call debug_hij_mo(hij, gen, mask, sp, puti) vect(:, puti) += hij * coefs end do else if(h(0,sp) == 1) then @@ -193,7 +195,6 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) pmob = p(turn2(j), sp) hij = integral8(pfix, pmob, hfix, hmob) hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) - !call debug_hij_mo(hij, gen, mask, sp, puti) vect(:, puti) += hij * coefs end do else @@ -206,7 +207,6 @@ subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) h2 = h(2,sfix) hij = (integral8(p1,p2,h1,h2) - integral8(p2,p1,h1,h2)) hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) - !call debug_hij_mo(hij, gen, mask, sp, puti) vect(:, puti) += hij * coefs end if end if @@ -248,19 +248,16 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) if(lbanned(i)) cycle hij = (integral8(p1, p2, i, hole) - integral8(p2, p1, i, hole)) hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) - !call debug_hij_mo(hij, gen, mask, sp, i) vect(:,i) += hij * coefs end do do i=hole+1,mo_tot_num if(lbanned(i)) cycle hij = (integral8(p1, p2, hole, i) - integral8(p2, p1, hole, i)) hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) - !call debug_hij_mo(hij, gen, mask, sp, i) vect(:,i) += hij * coefs end do call apply_particle(mask, sp, p2, det, ok, N_int) - !call assert(ok, "OKE223") call i_h_j(gen, det, N_int, hij) vect(:, p2) += hij * coefs else @@ -269,17 +266,13 @@ subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) if(lbanned(i)) cycle hij = integral8(p1, p2, i, hole) hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) - !call debug_hij_mo(hij, gen, mask, sp, i) vect(:,i) += hij * coefs end do end if call apply_particle(mask, sp, p1, det, ok, N_int) - !call assert(ok, "OKQQE2") call i_h_j(gen, det, N_int, hij) vect(:, p1) += hij * coefs - - !print *, "endouille" end subroutine @@ -303,7 +296,6 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) do i=1,mo_tot_num if(lbanned(i)) cycle call apply_particle(mask, sp, i, det, ok, N_int) - !call assert(ok, "qsdo") call i_h_j(gen, det, N_int, hij) vect(:, i) += hij * coefs end do @@ -360,31 +352,3 @@ end subroutine -subroutine debug_hij_mo(hij, gen, mask, s1, p1) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int,2), mask(N_int,2) - double precision, intent(in) :: hij - integer, intent(in) :: s1, p1 - integer(bit_kind) :: det(N_int,2) - double precision :: hij_ref, phase_ref - logical :: ok - integer :: degree - integer :: exc(0:2,2,2) - logical, external :: detEq - - call apply_particle(mask, s1, p1, det, ok, N_int) - !call assert(ok, "nokey_mo") - !call assert(.not. detEq(det, gen, N_int), "Hii ...") - call i_H_j_phase_out(gen,det,N_int,hij_ref,phase_ref,exc,degree) - if(hij /= hij_ref) then - print *, hij, hij_ref - print *, s1, p1 - call debug_det(gen, N_int) - call debug_det(mask, N_int) - call debug_det(det, N_int) - stop - end if -end function - diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f new file mode 100644 index 00000000..168cdd08 --- /dev/null +++ b/src/Davidson/davidson_parallel.irp.f @@ -0,0 +1,494 @@ + +!brought to you by garniroy inc. + +use bitmasks +use f77_zmq + +subroutine davidson_process(block, N, idx, vt, st) + + implicit none + + + integer , intent(in) :: block + integer , intent(inout) :: N + integer , intent(inout) :: idx(dav_size) + double precision , intent(inout) :: vt(N_states, dav_size) + double precision , intent(inout) :: st(N_states, dav_size) + + integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi + integer(bit_kind) :: sorted_i(N_int) + double precision :: s2, hij + logical :: wrotten(dav_size) + + + wrotten = .false. + sh = block + + do sh2=1,sh + exa = 0 + do ni=1,N_int + exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut_(sh,1),shortcut_(sh+1,1)-1 + org_i = sort_idx_(i,1) + if(sh==sh2) then + endi = i-1 + else + endi = shortcut_(sh2+1,1)-1 + end if + do ni=1,N_int + sorted_i(ni) = sorted_(ni,i,1) + enddo + + do j=shortcut_(sh2,1),endi + org_j = sort_idx_(j,1) + ext = exa + do ni=1,N_int + ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) + end do + if(ext <= 4) then + call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) ! psi_det + call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) + if(.not. wrotten(org_i)) then + wrotten(org_i) = .true. + vt (:,org_i) = 0d0 + st (:,org_i) = 0d0 + end if + if(.not. wrotten(org_j)) then + wrotten(org_j) = .true. + vt (:,org_j) = 0d0 + st (:,org_j) = 0d0 + end if + do istate=1,N_states + vt (istate,org_i) += hij*dav_ut(istate,org_j) + st (istate,org_i) += s2*dav_ut(istate,org_j) + vt (istate,org_j) += hij*dav_ut(istate,org_i) + st (istate,org_j) += s2*dav_ut(istate,org_i) + enddo + endif + enddo + enddo + enddo + + N = 0 + do i=1, dav_size + if(wrotten(i)) then + N = N+1 + do istate=1,N_states + vt (istate,N) = vt (istate,i) + st (istate,N) = st (istate,i) + idx(N) = i + enddo + end if + end do +end subroutine + + + + +subroutine davidson_collect(block, N, idx, vt, st , v0, s0) + implicit none + + + integer , intent(in) :: block + integer , intent(in) :: N + integer , intent(in) :: idx(N) + double precision , intent(in) :: vt(N_states, N) + double precision , intent(in) :: st(N_states, N) + double precision , intent(inout) :: v0(dav_size, N_states) + double precision , intent(inout) :: s0(dav_size, N_states) + + integer :: i + + do i=1,N + v0(idx(i), :) += vt(:, i) + s0(idx(i), :) += st(:, i) + end do +end subroutine + + +subroutine davidson_init(zmq_to_qp_run_socket) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket ! zmq_to_qp_run_socket + + touch dav_size dav_det dav_ut + call new_parallel_job(zmq_to_qp_run_socket,"davidson") +end subroutine + + + +subroutine davidson_add_task(zmq_to_qp_run_socket, block) + use f77_zmq + implicit none + + integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket + integer ,intent(in) :: block + character*(512) :: task + + + write(task,*) block + call add_task_to_taskserver(zmq_to_qp_run_socket, task) +end subroutine + + + +subroutine davidson_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call davidson_run_slave(1,i) +end + + +subroutine davidson_slave_tcp(i) + implicit none + integer, intent(in) :: i + + call davidson_run_slave(0,i) +end + + + +subroutine davidson_run_slave(thread,iproc) + use f77_zmq + implicit none + + integer, intent(in) :: thread, iproc + + integer :: worker_id, task_id, block + character*(512) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + if(worker_id == -1) then + print *, "WORKER -1" + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + return + end if + + call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) +! print *, "done slavin'" + !call sleep(1) + call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) +end subroutine + + + +subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR),intent(in) :: zmq_socket_push + integer,intent(in) :: worker_id + integer :: task_id + character*(512) :: task + + + integer :: block + integer :: N + integer , allocatable :: idx(:) + double precision , allocatable :: vt(:,:) + double precision , allocatable :: st(:,:) + + + allocate(idx(dav_size)) + allocate(vt(N_states, dav_size)) + allocate(st(N_states, dav_size)) + + + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) + if(task_id == 0) exit + read (task,*) block + + call davidson_process(block,N, idx, vt, st) + + ! reverse ? + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + call davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id) + end do +end subroutine + + + +subroutine davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push + integer ,intent(in) :: task_id + + integer ,intent(in) :: block + integer ,intent(in) :: N + integer ,intent(in) :: idx(N) + double precision ,intent(in) :: vt(N_states, N) + double precision ,intent(in) :: st(N_states, N) + integer :: rc + + rc = f77_zmq_send( zmq_socket_push, block, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "davidson_push_results failed to push block" + + rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "davidson_push_results failed to push N" + + rc = f77_zmq_send( zmq_socket_push, idx, 4*N, ZMQ_SNDMORE) + if(rc /= 4*N) stop "davidson_push_results failed to push idx" + + rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states* N, ZMQ_SNDMORE) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to push vt" + + rc = f77_zmq_send( zmq_socket_push, st, 8*N_states* N, ZMQ_SNDMORE) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to push st" + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "davidson_push_results failed to push task_id" +end subroutine + + + +subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull + integer ,intent(out) :: task_id + integer ,intent(out) :: block + integer ,intent(out) :: N + integer ,intent(out) :: idx(dav_size) + double precision ,intent(out) :: vt(N_states, dav_size) + double precision ,intent(out) :: st(N_states, dav_size) + + integer :: rc + + rc = f77_zmq_recv( zmq_socket_pull, block, 4, 0) + if(rc /= 4) stop "davidson_push_results failed to pull block" + + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) + if(rc /= 4) stop "davidson_push_results failed to pull N" + + rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0) + if(rc /= 4*N) stop "davidson_push_results failed to pull idx" + + rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states* N, 0) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull vt" + + rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states* N, 0) + if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull st" + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "davidson_pull_results failed to pull task_id" +end subroutine + + + +subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + + double precision ,intent(inout) :: v0(dav_size, N_states) + double precision ,intent(inout) :: s0(dav_size, N_states) + + integer :: more, task_id + + + integer :: block + integer :: N + integer , allocatable :: idx(:) + double precision , allocatable :: vt(:,:) + double precision , allocatable :: st(:,:) + integer :: deleted + logical, allocatable :: done(:) + allocate(done(shortcut_(0,1))) + deleted = 0 + done = .false. + + allocate(idx(dav_size)) + allocate(vt(N_states, dav_size)) + allocate(st(N_states, dav_size)) + + more = 1 + + do while (more == 1) + call davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id) + call davidson_collect(block, N, idx, vt, st , v0, s0) +! done(block) = .true. + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + deleted += 1 +! print *, "DELETED", deleted, done + end do +! print *, "done collector" +end subroutine + + +subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_collector + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: i + integer, external :: omp_get_thread_num + + double precision , intent(inout) :: v0(dav_size, N_states) + double precision , intent(inout) :: s0(dav_size, N_states) + + call zmq_set_running(zmq_to_qp_run_socket) + + zmq_collector = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + i = omp_get_thread_num() + + + PROVIDE nproc + + !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+2) + i = omp_get_thread_num() + if (i==0) then + call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) + call end_zmq_to_qp_run_socket(zmq_collector) + call end_zmq_pull_socket(zmq_socket_pull) + call davidson_miniserver_end() + else if(i==1) then + call davidson_miniserver_run() + else + call davidson_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'davidson') +end subroutine + + + +subroutine davidson_miniserver_run() + use f77_zmq + implicit none + integer(ZMQ_PTR) context + integer(ZMQ_PTR) responder + character*(64) address + character(len=:), allocatable :: buffer + integer rc + + allocate (character(len=20) :: buffer) + address = 'tcp://*:11223' + + context = f77_zmq_ctx_new() + responder = f77_zmq_socket(context, ZMQ_REP) + rc = f77_zmq_bind(responder,address) + + do + rc = f77_zmq_recv(responder, buffer, 5, 0) + if (buffer(1:rc) /= 'end') then + rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states, 0) + else + rc = f77_zmq_send (responder, "end", 3, 0) + exit + endif + enddo + + rc = f77_zmq_close(responder) + rc = f77_zmq_ctx_destroy(context) +end subroutine + + +subroutine davidson_miniserver_end() + implicit none + use f77_zmq + + integer(ZMQ_PTR) context + integer(ZMQ_PTR) requester + character*(64) address + integer rc + character*(64) buf + + address = trim(qp_run_address)//':11223' + context = f77_zmq_ctx_new() + requester = f77_zmq_socket(context, ZMQ_REQ) + rc = f77_zmq_connect(requester,address) + + rc = f77_zmq_send(requester, "end", 3, 0) + rc = f77_zmq_recv(requester, buf, 3, 0) + rc = f77_zmq_close(requester) + rc = f77_zmq_ctx_destroy(context) +! print *, "closed miniserver" +end subroutine + + +subroutine davidson_miniserver_get() + implicit none + use f77_zmq + + integer(ZMQ_PTR) context + integer(ZMQ_PTR) requester + character*(64) address + character*(20) buffer + integer rc + + address = trim(qp_run_address)//':11223' + + context = f77_zmq_ctx_new() + requester = f77_zmq_socket(context, ZMQ_REQ) + rc = f77_zmq_connect(requester,address) + + rc = f77_zmq_send(requester, "Hello", 5, 0) + rc = f77_zmq_recv(requester, dav_size, 4, 0) + TOUCH dav_size + rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) + rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states, 0) + TOUCH dav_det dav_ut + + rc = f77_zmq_close(requester) + rc = f77_zmq_ctx_destroy(context) + + touch dav_det dav_ut +end subroutine + + + +BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ] +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, dav_ut, (N_states, dav_size) ] +END_PROVIDER + + +BEGIN_PROVIDER [ integer, dav_size ] +END_PROVIDER + + + BEGIN_PROVIDER [ integer, shortcut_, (0:dav_size+1, 2) ] +&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, dav_size, 2) ] +&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, dav_size, 2) ] +&BEGIN_PROVIDER [ integer, sort_idx_, (dav_size, 2) ] + implicit none + call sort_dets_ab_v(dav_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), dav_size, N_int) + call sort_dets_ba_v(dav_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), dav_size, N_int) +END_PROVIDER \ No newline at end of file diff --git a/src/Davidson/davidson_slave.irp.f b/src/Davidson/davidson_slave.irp.f new file mode 100644 index 00000000..15830b1d --- /dev/null +++ b/src/Davidson/davidson_slave.irp.f @@ -0,0 +1,40 @@ +program davidson_slave + use f77_zmq + implicit none + + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states_diag) + character*(64) :: state + +! call provide_everything + call switch_qp_run_to_master + + zmq_context = f77_zmq_ctx_new () + zmq_state = 'davidson' + state = 'Waiting' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + call wait_for_state(zmq_state,state) + if(trim(state) /= "davidson") exit + !print *, 'Getting wave function' + !call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) + call davidson_miniserver_get() + + integer :: rc, i + + print *, 'Davidson slave running' + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call davidson_slave_tcp(i) + !$OMP END PARALLEL + end do +end + +! subroutine provide_everything +! PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states zmq_context +! end subroutine diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index daef3ee5..8e2ef3f6 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -43,7 +43,8 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) double precision, intent(in) :: H_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij - double precision, allocatable :: vt(:,:), ut(:,:) + double precision, allocatable :: vt(:,:) + double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -55,9 +56,10 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut - N_st_8 = align_double(N_st) + if(N_st /= N_states) stop "H_u_0_nstates N_st /= N_states" + N_st_8 = N_states ! align_double(N_st) ASSERT (Nint > 0) ASSERT (Nint == N_int) @@ -163,7 +165,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) v_0(i,istate) += H_jj(i) * u_0(i,istate) enddo enddo - deallocate (shortcut, sort_idx, sorted, version, ut) + !deallocate (shortcut, sort_idx, sorted, version, ut) end BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] @@ -175,11 +177,9 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] END_PROVIDER - - - subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) use bitmasks + use f77_zmq implicit none BEGIN_DOC ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> @@ -196,7 +196,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) double precision, intent(in) :: H_jj(n), S2_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 - double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) + double precision, allocatable :: vt(:,:), st(:,:) + double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -208,9 +209,12 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut - - N_st_8 = align_double(N_st) + !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut + + integer(ZMQ_PTR) :: handler + + if(N_st /= N_states .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates" + N_st_8 = N_st !! align_double(N_st) ASSERT (Nint > 0) ASSERT (Nint == N_int) @@ -222,15 +226,28 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) v_0 = 0.d0 s_0 = 0.d0 - + + if(n /= N_det) stop "n /= N_det" + do i=1,n do istate=1,N_st - ut(istate,i) = u_0(i,istate) + ut(istate,i) = u_0(i,istate) enddo enddo - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) + + dav_size = n + touch dav_size + dav_det = psi_det + dav_ut = ut + + call davidson_init(handler) + do sh=shortcut(0,1),1,-1 + call davidson_add_task(handler, sh) + enddo + + call davidson_run(handler, v_0, s_0) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& @@ -239,50 +256,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) Vt = 0.d0 St = 0.d0 - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0,1) - do sh2=sh,shortcut(0,1) - exa = 0 - do ni=1,Nint - exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) - end do - if(exa > 2) then - cycle - end if - - do i=shortcut(sh,1),shortcut(sh+1,1)-1 - org_i = sort_idx(i,1) - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1,1)-1 - end if - do ni=1,Nint - sorted_i(ni) = sorted(ni,i,1) - enddo - - do j=shortcut(sh2,1),endi - org_j = sort_idx(j,1) - ext = exa - do ni=1,Nint - ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) - end do - if(ext <= 4) then - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) - enddo - endif - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 @@ -326,6 +299,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) enddo enddo - deallocate (shortcut, sort_idx, sorted, version, ut) + deallocate (shortcut, sort_idx, sorted, version) end