diff --git a/configure b/configure index 171bb377..346d9804 100755 --- a/configure +++ b/configure @@ -494,9 +494,9 @@ def create_ninja_and_rc(l_installed): 'function qp_prepend_export () {', 'eval "value_1="\${$1}""', 'if [[ -z $value_1 ]] ; then', - ' echo "${1}=${2}:"', + ' echo "${2}:"', 'else', - ' echo "${1}=${2}:${value_1}"', + ' echo "${2}:${value_1}"', 'fi', '}', 'export PYTHONPATH=$(qp_prepend_export "PYTHONPATH" "${QP_EZFIO}/Python":"${QP_PYTHON}")', diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 8f682eef..506b1639 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -208,7 +208,7 @@ let end_job msg program_state rep_socket pair_socket = address_tcp = None; address_inproc = None; running = true; - accepting_clients = false; + accepting_clients = false; data = StringHashtbl.create (); } @@ -625,7 +625,7 @@ let get_data msg program_state rep_socket = let value = match StringHashtbl.find program_state.data key with | Some value -> value - | None -> String.make 1 (Char.of_int_exn 0) + | None -> "\000" in Message.GetDataReply (Message.GetDataReply_msg.create ~value) |> Message.to_string_list diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 36ad63b2..8ebbdff7 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -293,7 +293,6 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ parts_to_get(index(i)) -= 1 if(parts_to_get(index(i)) < 0) then print *, i, index(i), parts_to_get(index(i)) - print *, "PARTS ??" print *, parts_to_get stop "PARTS ??" end if @@ -320,7 +319,12 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ end do integer, external :: zmq_abort - + double precision :: E0, avg, prop + + call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) + firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 + call get_first_tooth(actually_computed, tooth) + if (firstTBDcomb > Ncomb) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(1) @@ -330,12 +334,6 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ endif exit pullLoop endif - - double precision :: E0, avg, prop - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - if(Nabove(1) < 5d0) cycle - call get_first_tooth(actually_computed, tooth) E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) if (tooth <= comb_teeth) then @@ -348,7 +346,7 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ eqt = 0.d0 endif call wall_time(time) - if ( (dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error) ) then + if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 10) then ! Termination pt2(pt2_stoch_istate) = avg error(pt2_stoch_istate) = eqt @@ -367,13 +365,29 @@ subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_ endif end if end do pullLoop - - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) + + if(tooth == comb_teeth+1) then + pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:)) + error(pt2_stoch_istate) = 0d0 + else + E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) + prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) + E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop + pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) + error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + end if + +!======= +! +! E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) +! prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) +! prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) +! E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop +! pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) +! +!>>>>>>> master call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call sort_selection_buffer(b) end subroutine @@ -422,7 +436,7 @@ subroutine get_first_tooth(computed, first_teeth) integer, intent(out) :: first_teeth integer :: i, first_det - first_det = 1 + first_det = N_det_generators+1+1 first_teeth = 1 do i=first_det_of_comb, N_det_generators if(.not.(computed(i))) then @@ -431,7 +445,7 @@ subroutine get_first_tooth(computed, first_teeth) end if end do - do i=comb_teeth, 1, -1 + do i=comb_teeth+1, 1, -1 if(first_det_of_teeth(i) < first_det) then first_teeth = i exit @@ -570,7 +584,7 @@ END_PROVIDER comb_step = 1d0/dfloat(comb_teeth) first_det_of_comb = 1 do i=1,N_det_generators - if(pt2_weight(i)/norm_left < .25d0*comb_step) then + if(pt2_weight(i)/norm_left < .5d0*comb_step) then first_det_of_comb = i exit end if diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index bc88c23c..8ffe502c 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -25,12 +25,6 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: n_tasks, k, n_tasks_max integer, allocatable :: i_generator(:), subset(:) -!if (mpi_master) then -! do i=1,N_det_generators -! print '(I6,X,100(I10,X))' ,i, psi_det_generators(:,:,i) -! enddo -!endif - n_tasks_max = N_det_generators/100+1 allocate(task_id(n_tasks_max), task(n_tasks_max)) allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max)) @@ -67,23 +61,24 @@ subroutine run_pt2_slave(thread,iproc,energy) read (task(k),*) subset(k), i_generator(k) enddo - double precision :: time0, time1 - call wall_time(time0) +! double precision :: time0, time1 +! call wall_time(time0) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k)) enddo - call wall_time(time1) - +! call wall_time(time1) +! integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then done = .true. endif call push_pt2_results(zmq_socket_push, i_generator, pt2, task_id, n_tasks) - ! Try to adjust n_tasks around 5 second per job - n_tasks = min(n_tasks,int( 5.d0*dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 +! ! Try to adjust n_tasks around 5 second per job +! n_tasks = min(n_tasks,int( 5.d0*dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 + n_tasks = n_tasks+1 end do integer, external :: disconnect_from_taskserver diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 52b75a3a..09f7974c 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -97,8 +97,9 @@ subroutine run_selection_slave_new(thread,iproc,energy) pt2(:,:) = 0d0 buf%cur = 0 - ! Try to adjust n_tasks around 5 second per job - n_tasks = min(n_tasks,int( 5.d0 * dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 +! ! Try to adjust n_tasks around 5 second per job +! n_tasks = min(n_tasks,int( 5.d0 * dble(n_tasks) / (time1 - time0 + 1.d-9)))+1 + n_tasks = n_tasks+1 end do integer, external :: disconnect_from_taskserver diff --git a/plugins/Full_CI_ZMQ/zmq_selection.irp.f b/plugins/Full_CI_ZMQ/zmq_selection.irp.f index 6bcc548a..b3a87e95 100644 --- a/plugins/Full_CI_ZMQ/zmq_selection.irp.f +++ b/plugins/Full_CI_ZMQ/zmq_selection.irp.f @@ -60,7 +60,8 @@ subroutine ZMQ_selection(N_in, pt2) task = ' ' do i= 1, N_det_generators - if (i>ishft(N_det_generators,-2)) then +! /!\ Fragments don't work +! if (i>-ishft(N_det_generators,-2)) then write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') 0, i, N ipos += 30 if (ipos > 63970) then @@ -69,18 +70,18 @@ subroutine ZMQ_selection(N_in, pt2) endif ipos=1 endif - else - do j=1,fragment_count - write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N - ipos += 30 - if (ipos > 63970) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - endif +! else +! do j=1,fragment_count +! write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N +! ipos += 30 +! if (ipos > 63970) then +! if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then +! stop 'Unable to add task to task server' +! endif +! ipos=1 +! endif +! end do +! endif enddo if (ipos > 1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then diff --git a/plugins/Generators_CAS/generators.irp.f b/plugins/Generators_CAS/generators.irp.f index 426306dc..4be8c061 100644 --- a/plugins/Generators_CAS/generators.irp.f +++ b/plugins/Generators_CAS/generators.irp.f @@ -22,6 +22,9 @@ END_PROVIDER BEGIN_PROVIDER [ integer(bit_kind), psi_det_generators, (N_int,2,psi_det_size) ] &BEGIN_PROVIDER [ double precision, psi_coef_generators, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_gen, (N_int,2,psi_det_size) ] +&BEGIN_PROVIDER [ double precision, psi_coef_sorted_gen, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer, psi_det_sorted_gen_order, (psi_det_size) ] implicit none BEGIN_DOC ! For Single reference wave functions, the generator is the @@ -30,19 +33,36 @@ END_PROVIDER integer :: i, k, l, m logical :: good integer, external :: number_of_holes,number_of_particles + integer, allocatable :: nongen(:) + integer :: inongen + inongen = 0 + + allocate(nongen(N_det)) + m=0 do i=1,N_det good = ( number_of_holes(psi_det_sorted(1,1,i)) ==0).and.(number_of_particles(psi_det_sorted(1,1,i))==0 ) if (good) then m = m+1 + psi_det_sorted_gen_order(i) = m do k=1,N_int psi_det_generators(k,1,m) = psi_det_sorted(k,1,i) psi_det_generators(k,2,m) = psi_det_sorted(k,2,i) enddo psi_coef_generators(m,:) = psi_coef_sorted(i,:) + else + inongen += 1 + nongen(inongen) = i endif enddo + psi_det_sorted_gen(:,:,:N_det_generators) = psi_det_generators(:,:,:N_det_generators) + psi_coef_sorted_gen(:N_det_generators, :) = psi_coef_generators(:N_det_generators, :) + do i=1,inongen + psi_det_sorted_gen_order(nongen(i)) = N_det_generators+i + psi_det_sorted_gen(:,:,N_det_generators+i) = psi_det_sorted(:,:,nongen(i)) + psi_coef_sorted_gen(N_det_generators+i, :) = psi_coef_sorted(nongen(i),:) + end do END_PROVIDER BEGIN_PROVIDER [ integer, size_select_max] diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index 835897bd..c40ba2d4 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det do i=1,N_det norm = norm + psi_average_norm_contrib_sorted(i) - if (norm > threshold_generators) then + if (norm >= threshold_generators) then N_det_generators = i exit endif @@ -35,6 +35,22 @@ END_PROVIDER END_PROVIDER + BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_gen, (N_int,2,psi_det_size) ] +&BEGIN_PROVIDER [ double precision, psi_coef_sorted_gen, (psi_det_size,N_states) ] +&BEGIN_PROVIDER [ integer, psi_det_sorted_gen_order, (psi_det_size) ] + + implicit none + BEGIN_DOC + ! For Single reference wave functions, the generator is the + ! Hartree-Fock determinant + END_DOC + integer :: i, k + psi_det_sorted_gen = psi_det_sorted + psi_coef_sorted_gen = psi_coef_sorted + psi_det_sorted_gen_order = psi_det_sorted_order +END_PROVIDER + + BEGIN_PROVIDER [integer, degree_max_generators] implicit none BEGIN_DOC diff --git a/plugins/Hartree_Fock/diagonalize_fock.irp.f b/plugins/Hartree_Fock/diagonalize_fock.irp.f index b3003985..bc874514 100644 --- a/plugins/Hartree_Fock/diagonalize_fock.irp.f +++ b/plugins/Hartree_Fock/diagonalize_fock.irp.f @@ -1,4 +1,4 @@ - BEGIN_PROVIDER [ double precision, diagonal_Fock_matrix_mo, (ao_num) ] + BEGIN_PROVIDER [ double precision, diagonal_Fock_matrix_mo, (mo_tot_num) ] &BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_tot_num) ] implicit none BEGIN_DOC diff --git a/plugins/Hartree_Fock/localize_mos.irp.f b/plugins/Hartree_Fock/localize_mos.irp.f index 8a665c64..27c97ddb 100644 --- a/plugins/Hartree_Fock/localize_mos.irp.f +++ b/plugins/Hartree_Fock/localize_mos.irp.f @@ -21,7 +21,7 @@ program localize_mos mo_coef(1,1),size(mo_coef,1),1.d-6,rank) print *, rank - if (elec_alpha_num>elec_alpha_num) then + if (elec_alpha_num>elec_beta_num) then W = 0.d0 do k=elec_beta_num+1,elec_alpha_num do j=1,ao_num diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f index c59bbd9f..a63b0ded 100644 --- a/plugins/Psiref_Utils/psi_ref_utils.irp.f +++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f @@ -9,7 +9,7 @@ use bitmasks ! function. END_DOC call sort_dets_by_det_search_key(N_det_ref, psi_ref, psi_ref_coef, & - psi_ref_sorted_bit, psi_ref_coef_sorted_bit) + psi_ref_sorted_bit, psi_ref_coef_sorted_bit, N_states) END_PROVIDER @@ -152,7 +152,7 @@ END_PROVIDER ! function. END_DOC call sort_dets_by_det_search_key(N_det_ref, psi_non_ref, psi_non_ref_coef, & - psi_non_ref_sorted_bit, psi_non_ref_coef_sorted_bit) + psi_non_ref_sorted_bit, psi_non_ref_coef_sorted_bit, N_states) END_PROVIDER diff --git a/plugins/QMC/qp_convert_qmcpack_to_ezfio.py b/plugins/QMC/qp_convert_qmcpack_to_ezfio.py index b6237476..100b9a6e 100755 --- a/plugins/QMC/qp_convert_qmcpack_to_ezfio.py +++ b/plugins/QMC/qp_convert_qmcpack_to_ezfio.py @@ -60,7 +60,7 @@ beta = ezfio.get_electrons_elec_beta_num() print "elec_alpha_num", alpha print "elec_beta_num", beta print "elec_tot_num", alpha + beta -print "spin_multiplicity", 2 * (alpha - beta) + 1 +print "spin_multiplicity", (alpha - beta) + 1 l_label = ezfio.get_nuclei_nucl_label() l_charge = ezfio.get_nuclei_nucl_charge() @@ -133,7 +133,7 @@ d_gms_order ={ 0:["s"], 1:[ "x", "y", "z" ], 2:[ "xx", "yy", "zz", "xy", "xz", "yz" ], 3:[ "xxx", "yyy", "zzz", "xxy", "xxz", "yyx", "yyz", "zzx", "zzy", "xyz"], - 4: ["xxxx", "yyyy", "zzzz", "xxxy", "xxxz", "yyyx", "yyyz", "zzzx", "zzzy", "xxyy", "xxzz", "yyzz", "xxyz", "yyxz", "zzxy", "xxxx", "yyyy", "zzzz", "xxxy", "xxxz", "yyyx", "yyyz", "zzzx", "zzzy", "xxyy", "xxzz", "yyzz", "xxyz", "yyxz","zzxy"] } + 4:[ "xxxx", "yyyy", "zzzz", "xxxy", "xxxz", "yyyx", "yyyz", "zzzx", "zzzy", "xxyy", "xxzz", "yyzz", "xxyz", "yyxz", "zzxy"] } def compare_gamess_style(item1, item2): n1,n2 = map(len,(item1,item2)) diff --git a/plugins/dress_zmq/NEEDED_CHILDREN_MODULES b/plugins/dress_zmq/NEEDED_CHILDREN_MODULES index 55f8f738..96b2cfdc 100644 --- a/plugins/dress_zmq/NEEDED_CHILDREN_MODULES +++ b/plugins/dress_zmq/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Selectors_full Generators_full ZMQ +ZMQ diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index 190a94ad..261966be 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -20,15 +20,15 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) end subroutine -BEGIN_PROVIDER [ integer, psi_from_sorted, (N_det) ] +BEGIN_PROVIDER [ integer, psi_from_sorted_gen, (N_det) ] implicit none integer :: i,inpsisor - psi_from_sorted = 0 + psi_from_sorted_gen = 0 do i=1,N_det - psi_from_sorted(psi_det_sorted_order(i)) = i - inpsisor = psi_det_sorted_order(i) + psi_from_sorted_gen(psi_det_sorted_gen_order(i)) = i + inpsisor = psi_det_sorted_gen_order(i) if(inpsisor <= 0) stop "idx_non_ref_from_sorted" end do END_PROVIDER @@ -84,8 +84,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index !hole (k,1) = iand(psi_det_generators(k,1,i_generator), full_ijkl_bitmask(k)) !hole (k,2) = iand(psi_det_generators(k,2,i_generator), full_ijkl_bitmask(k)) !particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), full_ijkl_bitmask(k)) - !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) - + !particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), full_ijkl_bitmask(k)) enddo integer :: N_holes(2), N_particles(2) @@ -100,10 +99,10 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate (indices(N_det), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order - PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order + !PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + !PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order + !PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + !PROVIDE psi_bilinear_matrix_transp_order k=1 do i=1,N_det_alpha_unique @@ -118,7 +117,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1 i = psi_bilinear_matrix_rows(l_a) if (nt + exc_degree(i) <= 4) then - indices(k) = psi_det_sorted_order(psi_bilinear_matrix_order(l_a)) + indices(k) = psi_det_sorted_gen_order(psi_bilinear_matrix_order(l_a)) k=k+1 endif enddo @@ -137,7 +136,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index i = psi_bilinear_matrix_transp_columns(l_a) if (exc_degree(i) < 3) cycle if (nt + exc_degree(i) <= 4) then - indices(k) = psi_det_sorted_order( & + indices(k) = psi_det_sorted_gen_order( & psi_bilinear_matrix_order( & psi_bilinear_matrix_transp_order(l_a))) k=k+1 @@ -161,15 +160,15 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index negMask(i,1) = not(psi_det_generators(i,1,i_generator)) negMask(i,2) = not(psi_det_generators(i,2,i_generator)) end do - if(psi_det_generators(1,1,i_generator) /= psi_det_sorted(1,1,i_generator)) stop "gen <> sorted" + if(psi_det_generators(1,1,i_generator) /= psi_det_sorted_gen(1,1,i_generator)) stop "gen <> sorted" do k=1,nmax i = indices(k) - mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) - mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_gen(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_gen(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) do j=2,N_int - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_gen(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_gen(j,2,i)) nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) end do @@ -178,8 +177,8 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index preinteresting(0) += 1 preinteresting(preinteresting(0)) = i do j=1,N_int - preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted(j,1,i) - preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted(j,2,i) + preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted_gen(j,1,i) + preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted_gen(j,2,i) enddo else if(nt <= 2) then prefullinteresting(0) += 1 @@ -288,13 +287,13 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index do ii=1,prefullinteresting(0) i = prefullinteresting(ii) nt = 0 - mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i)) - mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i)) + mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_gen(1,1,i)) + mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_gen(1,2,i)) nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) if (nt > 2) cycle do j=N_int,2,-1 - mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i)) - mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i)) + mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_gen(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_gen(j,2,i)) nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) if (nt > 2) exit end do @@ -302,11 +301,11 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index if(nt <= 2) then fullinteresting(0) += 1 fullinteresting(fullinteresting(0)) = i - fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i) - fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i) + fullminilist(1,1,fullinteresting(0)) = psi_det_sorted_gen(1,1,i) + fullminilist(1,2,fullinteresting(0)) = psi_det_sorted_gen(1,2,i) do j=2,N_int - fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i) - fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i) + fullminilist(j,1,fullinteresting(0)) = psi_det_sorted_gen(j,1,i) + fullminilist(j,2,fullinteresting(0)) = psi_det_sorted_gen(j,2,i) enddo end if end do @@ -391,7 +390,7 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned, allocate(abuf(siz), labuf(N_det), putten(N_det), det_minilist(N_int, 2, N_det)) do i=1,siz - abuf(i) = psi_from_sorted(rabuf(i)) + abuf(i) = psi_from_sorted_gen(rabuf(i)) end do putten = .false. diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index 731011e3..b99eb1d7 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -10,11 +10,10 @@ subroutine run_dressing(N_st,energy) integer :: iteration integer :: n_it_dress_max - double precision :: thresh_dress + double precision :: thresh_dress, dummy thresh_dress = thresh_dressed_ci n_it_dress_max = n_it_max_dressed_ci - if(n_it_dress_max == 1) then do j=1,N_states do i=1,N_det @@ -30,13 +29,23 @@ subroutine run_dressing(N_st,energy) delta_E = 1.d0 iteration = 0 do iteration=1,n_it_dress_max + N_det_delta_ij = N_det + touch N_det_delta_ij print *, '===============================================' print *, 'Iteration', iteration, '/', n_it_dress_max print *, '===============================================' print *, '' E_old = sum(psi_energy(:)) + print *, 'Variational energy ' do i=1,N_st - call write_double(6,ci_energy_dressed(i),"Energy") + print *, i, psi_energy(i)+nuclear_repulsion + enddo + !print *, "DELTA IJ", delta_ij(1,1,1) + PROVIDE delta_ij_tmp + if(.true.) call delta_ij_done() + print *, 'Dressed energy ' + do i=1,N_st + print *, i, ci_energy_dressed(i) enddo call diagonalize_ci_dressed E_new = sum(psi_energy(:)) @@ -52,8 +61,16 @@ subroutine run_dressing(N_st,energy) exit endif enddo - call write_double(6,ci_energy_dressed(1),"Final energy") + print *, 'Variational energy ' + do i=1,N_st + print *, i, psi_energy(i)+nuclear_repulsion + enddo + print *, 'Dressed energy ' + do i=1,N_st + print *, i, ci_energy_dressed(i)+nuclear_repulsion + enddo endif - energy(1:N_st) = ci_energy_dressed(1:N_st) + + if(.true.) energy(1:N_st) = 0d0 ! ci_energy_dressed(1:N_st) end diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index c7633b91..6de3e2da 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -6,6 +6,10 @@ subroutine dress_slave read_wf = .False. distributed_davidson = .False. SOFT_TOUCH read_wf distributed_davidson + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + call provide_everything call switch_qp_run_to_master call run_wf @@ -24,7 +28,12 @@ subroutine run_wf double precision :: energy(N_states_diag) character*(64) :: states(1) integer :: rc, i - + integer, external :: zmq_get_dvector, zmq_get_N_det_generators + integer, external :: zmq_get_psi, zmq_get_N_det_selectors + integer, external :: zmq_get_N_states_diag + double precision :: tmp + + call provide_everything zmq_context = f77_zmq_ctx_new () @@ -33,34 +42,36 @@ subroutine run_wf zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() do - call wait_for_states(states,zmq_state,1) - - if(trim(zmq_state) == 'Stopped') then + if(zmq_state(:7) == 'Stopped') then exit - else if (trim(zmq_state) == 'dress') then - - ! Selection + else if (zmq_state(:5) == 'dress') then + ! Dress ! --------- - - print *, 'dress' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + !call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle + !TOUCH psi_det + if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle + if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle + if (zmq_get_dvector(zmq_to_qp_run_socket,1,'dress_stoch_istate',tmp,1) == -1) cycle + dress_stoch_istate = int(tmp) + psi_energy(1:N_states) = energy(1:N_states) + TOUCH psi_energy dress_stoch_istate state_average_weight PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - - !$OMP PARALLEL PRIVATE(i) - i = omp_get_thread_num() - call dress_slave_tcp(i, energy) - !$OMP END PARALLEL - print *, 'dress done' - + !!$OMP PARALLEL PRIVATE(i) + !i = omp_get_thread_num() +! call dress_slave_tcp(i+1, energy) + call dress_slave_tcp(0, energy) + !!$OMP END PARALLEL endif - end do end @@ -70,6 +81,6 @@ subroutine dress_slave_tcp(i,energy) integer, intent(in) :: i logical :: lstop lstop = .False. - call run_dress_slave(0,i,energy,lstop) + call run_dress_slave(0,i,energy) end diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 6bee7256..6b7bf396 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -4,13 +4,14 @@ BEGIN_PROVIDER [ integer, fragment_first ] END_PROVIDER -subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) +subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) use f77_zmq implicit none + integer, intent(in) :: lndet character(len=64000) :: task - + character(len=3200) :: temp integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -27,9 +28,9 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) double precision :: time integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) - - - allocate(delta(N_states,N_det), delta_s2(N_det,N_states)) + task(:) = CHAR(0) + temp(:) = CHAR(0) + allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) do dress_stoch_istate=1,N_states SOFT_TOUCH dress_stoch_istate @@ -37,14 +38,14 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) state_average_weight(dress_stoch_istate) = 1.d0 TOUCH state_average_weight + !provide psi_coef_generators provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral dress_weight psi_selectors - + !print *, dress_e0_denominator print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' print *, '========== ================= ================= =================' - - + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull, 'dress') integer, external :: zmq_put_psi @@ -52,6 +53,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer, external :: zmq_put_N_det_selectors integer, external :: zmq_put_dvector integer, external :: zmq_set_running + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then stop 'Unable to put psi on ZMQ server' endif @@ -64,55 +66,87 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',dress_e0_denominator,size(dress_e0_denominator)) == -1) then stop 'Unable to put energy on ZMQ server' endif - + if (zmq_put_dvector(zmq_to_qp_run_socket,1,"state_average_weight",state_average_weight,N_states) == -1) then + stop 'Unable to put state_average_weight on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,"dress_stoch_istate",real(dress_stoch_istate,8),1) == -1) then + stop 'Unable to put dress_stoch_istate on ZMQ server' + endif + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket - integer :: ipos + integer :: ipos, sz + integer :: block(1), block_i, cur_tooth_reduce, ntas + logical :: flushme + block = 0 + block_i = 0 + cur_tooth_reduce = 0 ipos=1 - do i=1,N_dress_jobs - if(dress_jobs(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - - ipos=1 - endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, dress_jobs(i) - ipos += 20 - if (ipos > 63980) then + ntas = 0 + do i=1,N_dress_jobs+1 + flushme = (i==N_dress_jobs+1 .or. block_i == size(block) .or. block_i >=cur_tooth_reduce ) + if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) + + if(flushme .and. block_i > 0) then + if(block(1) > fragment_first) then + ntas += 1 + write(temp, '(I9,1X,60(I9,1X))') 0, block(:block_i) + sz = len(trim(temp))+1 + temp(sz:sz) = '|' + !write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) + write(task(ipos:ipos+sz), *) temp(:sz) + !ipos += 20 + ipos += sz+1 + if (ipos > 63000 .or. i==N_dress_jobs+1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then stop 'Unable to add task to task server' endif + ipos=1 endif - end do + else + if(block_i /= 1) stop "reduced fragmented dets" + do j=1,fragment_count + ntas += 1 + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, block(1) + ipos += 20 + if (ipos > 63000 .or. i==N_dress_jobs+1) then + ntas += 1 + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + endif + end do + end if + block_i = 0 + block = 0 + end if + + if(i /= N_dress_jobs+1) then + cur_tooth_reduce = tooth_reduce(dress_jobs(i)) + block_i += 1 + block(block_i) = dress_jobs(i) end if end do - if (ipos > 1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - endif if (zmq_set_running(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Failed in zmq_set_running' endif - !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + call omp_set_nested(.true.) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) & !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,& - dress_stoch_istate) + dress_stoch_istate) else call dress_slave_inproc(i) endif !$OMP END PARALLEL + call omp_set_nested(.false.) delta_out(dress_stoch_istate,1:N_det) = delta(dress_stoch_istate,1:N_det) - delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2_out(dress_stoch_istate,1:N_det) + delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2(dress_stoch_istate,1:N_det) call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') print *, '========== ================= ================= =================' @@ -140,7 +174,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, implicit none - integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(in) :: istate @@ -150,7 +183,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) + double precision, allocatable :: delta_loc(:,:,:) double precision, allocatable :: dress_detail(:,:) double precision :: dress_mwen(N_states) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -162,127 +195,93 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer :: i, j, k, i_state, N integer :: task_id, ind double precision, save :: time0 = -1.d0 - double precision :: time, timeLast, old_tooth + double precision :: time double precision, external :: omp_get_wtime - integer :: cur_cp, old_cur_cp - integer, allocatable :: parts_to_get(:) - logical, allocatable :: actually_computed(:) - integer :: total_computed - + integer :: cur_cp, last_cp + integer :: delta_loc_cur, is, N_buf(3) + integer, allocatable :: int_buf(:), agreg_for_cp(:) + double precision, allocatable :: double_buf(:) + integer(bit_kind), allocatable :: det_buf(:,:,:) + integer, external :: zmq_delete_tasks + last_cp = 10000000 + allocate(agreg_for_cp(N_cp)) + agreg_for_cp = 0 + allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) + delta_loc_cur = 1 + delta = 0d0 delta_s2 = 0d0 - allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) allocate(delta_loc(N_states, N_det, 2)) - dress_detail = 0d0 - delta_det = 0d0 + dress_detail = -1000d0 cp = 0d0 - total_computed = 0 character*(512) :: task - - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) - - dress_mwen =0.d0 - - parts_to_get(:) = 1 - if(fragment_first > 0) then - do i=1,fragment_first - parts_to_get(i) = fragment_count - enddo - endif - - actually_computed = .false. zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() more = 1 if (time0 < 0.d0) then call wall_time(time0) endif - timeLast = time0 - cur_cp = 0 - old_cur_cp = 0 - logical :: loop + logical :: loop, floop + + floop = .true. loop = .true. pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) - dress_mwen(:) = 0d0 - - !!!!! A VERIFIER !!!!! - do i_state=1,N_states - do i=1, N_det - dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(i, i_state) - end do - end do - - dress_detail(:, ind) += dress_mwen(:) - do j=1,N_cp !! optimizable - if(cps(ind, j) > 0d0) then - if(tooth_of_det(ind) < cp_first_tooth(j)) stop "coef on supposedely deterministic det" - double precision :: fac - integer :: toothMwen - logical :: fracted - fac = cps(ind, j) / cps_N(j) * dress_weight_inv(ind) * comb_step - cp(1:N_states,1:N_det,j,1) += delta_loc(1:N_states,1:N_det,1) * fac - cp(1:N_states,1:N_det,j,2) += delta_loc(1:N_states,1:N_det,2) * fac - end if - end do - toothMwen = tooth_of_det(ind) - fracted = (toothMwen /= 0) - if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) - - if(fracted) then - delta_det(1:N_states,1:N_det,toothMwen-1, 1) = delta_det(1:N_states,1:N_det,toothMwen-1, 1) + delta_loc(1:N_states,1:N_det,1) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen-1, 2) = delta_det(1:N_states,1:N_det,toothMwen-1, 2) + delta_loc(1:N_states,1:N_det,2) * (1d0-fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) * (fractage(toothMwen)) - delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) * (fractage(toothMwen)) - else - delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) - delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) + call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) + if(floop) then + call wall_time(time) + time0 = time + floop = .false. end if - - parts_to_get(ind) -= 1 - if(parts_to_get(ind) == 0) then - actually_computed(ind) = .true. - total_computed += 1 + if(cur_cp == -1 .and. ind == N_det_generators) then + call wall_time(time) end if - - - integer, external :: zmq_delete_tasks - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then + + if(cur_cp == -1) then + call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then stop 'Unable to delete tasks' - endif - if(more == 0) loop = .false. - - time = omp_get_wtime() - - if((time - timeLast > 2d0) .or. (.not. loop)) then - timeLast = time - cur_cp = N_cp - - do i=1,N_det_generators - if(.not. actually_computed(dress_jobs(i))) then - if(i /= 1) then - cur_cp = done_cp_at(i-1) - else - cur_cp = 0 - end if - exit - end if + endif + if(more == 0) loop = .false. !stop 'loop = .false.' !!!!!!!!!!!!!!!! + dress_detail(:, ind) = dress_mwen(:) + !print *, "DETAIL", ind, dress_mwen + else if(cur_cp > 0) then + if(ind == 0) cycle + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,N_det + cp(:,i,cur_cp,1) += delta_loc(:,i,1) end do - if(cur_cp == 0) cycle pullLoop + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,N_det + cp(:,i,cur_cp,2) += delta_loc(:,i,2) + end do + !$OMP END PARALLEL DO + agreg_for_cp(cur_cp) += ind + !print *, agreg_for_cp(cur_cp), ind, needed_by_cp(cur_cp), cur_cp + if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then + stop "too much results..." + end if + if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle + + call wall_time(time) + last_cp = cur_cp double precision :: su, su2, eqt, avg, E0, val integer, external :: zmq_abort su = 0d0 su2 = 0d0 - + !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i, val) SHARED(comb, dress_detail, & + !$OMP cur_cp,istate,cps_N) REDUCTION(+:su) REDUCTION(+:su2) do i=1, int(cps_N(cur_cp)) call get_comb_val(comb(i), dress_detail, cur_cp, val, istate) su += val su2 += val*val end do + !$OMP END PARALLEL DO + avg = su / cps_N(cur_cp) eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) ) E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) @@ -290,44 +289,25 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if - - call wall_time(time) - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then + !print '(I2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', cps_N(cur_cp), avg+E0+E(istate), eqt, time-time0, '' + if ((dabs(eqt/(avg+E0+E(istate))) < relative_error .and. cps_N(cur_cp) >= 10)) then ! Termination - print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' + print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(1) if (zmq_abort(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Error in sending abort signal (2)' endif - endif - else - if (cur_cp > old_cur_cp) then - old_cur_cp = cur_cp - print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - endif + endif endif end if end do pullLoop - if(total_computed == N_det_generators) then - delta (1:N_states,1:N_det) = 0d0 - delta_s2(1:N_states,1:N_det) = 0d0 - do i=comb_teeth+1,0,-1 - delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) - delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) - end do - else + delta(:,:) = cp(:,:,last_cp,1) + delta_s2(:,:) = cp(:,:,last_cp,2) - delta (1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,1) - delta_s2(1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,2) - do i=cp_first_tooth(cur_cp)-1,0,-1 - delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) - delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) - end do - - end if - dress(istate) = E(istate)+E0 + dress(istate) = E(istate)+E0+avg call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine @@ -368,8 +348,8 @@ end function ! ! gen_per_cp : number of generators per checkpoint END_DOC - comb_teeth = 64 - N_cps_max = 256 + comb_teeth = min(1+N_det/10,10) + N_cps_max = 16 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER @@ -378,25 +358,51 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] &BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] &BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ] &BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] &BEGIN_PROVIDER [ integer, N_dress_jobs ] &BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] &BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, tooth_reduce, (N_det_generators) ] implicit none - logical, allocatable :: computed(:) + logical, allocatable :: computed(:), comp_filler(:) integer :: i, j, last_full, dets(comb_teeth) + integer :: k, l, cur_cp, under_det(comb_teeth+1) + integer :: cp_limit(N_cps_max) integer, allocatable :: iorder(:), first_cp(:) + integer, allocatable :: filler(:) + integer :: nfiller, lfiller, cfiller + logical :: fracted + + integer :: first_suspect + provide psi_coef_generators + first_suspect = 1 + allocate(filler(n_det_generators)) allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) allocate(computed(N_det_generators)) + allocate(comp_filler(N_det_generators)) first_cp = 1 cps = 0d0 cur_cp = 1 done_cp_at = 0 - + done_cp_at_det = 0 + needed_by_cp = 0 + comp_filler = .false. computed = .false. + cps_N = 1d0 + tooth_reduce = 0 + integer :: fragsize + fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) + + do i=1,N_cps_max + cp_limit(i) = fragsize * i * (i+1) / 2 + end do + cp_limit(N_cps_max) = N_det*2 + N_dress_jobs = first_det_of_comb - 1 do i=1, N_dress_jobs dress_jobs(i) = i @@ -404,13 +410,18 @@ END_PROVIDER end do l=first_det_of_comb + call random_seed(put=(/321,654,65,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) call RANDOM_NUMBER(comb) + lfiller = 1 + nfiller = 1 do i=1,N_det_generators + !print *, i, N_dress_jobs comb(i) = comb(i) * comb_step !DIR$ FORCEINLINE call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) - if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then + !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then + if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then first_cp(cur_cp+1) = N_dress_jobs done_cp_at(N_dress_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) @@ -419,17 +430,69 @@ END_PROVIDER cur_cp += 1 end if - if (N_dress_jobs == N_det_generators) exit + if (N_dress_jobs == N_det_generators) then + exit + end if end if - do while (computed(l)) - l=l+1 - enddo - k=N_dress_jobs+1 - dress_jobs(k) = l - computed(l) = .True. - N_dress_jobs = k + + !!!!!!!!!!!!!!!!!!!!!!!! + if(.TRUE.) then + do l=first_suspect,N_det_generators + if((.not. computed(l))) then + N_dress_jobs+=1 + dress_jobs(N_dress_jobs) = l + computed(l) = .true. + first_suspect = l + exit + end if + end do + + if (N_dress_jobs == N_det_generators) exit + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ELSE + !!!!!!!!!!!!!!!!!!!!!!!!!!!! + do l=first_suspect,N_det_generators + if((.not. computed(l)) .and. (.not. comp_filler(l))) exit + end do + first_suspect = l + if(l > N_det_generators) cycle + + cfiller = tooth_of_det(l)-1 + if(cfiller > lfiller) then + do j=1,nfiller-1 + if(.not. computed(filler(j))) then + k=N_dress_jobs+1 + dress_jobs(k) = filler(j) + N_dress_jobs = k + end if + computed(filler(j)) = .true. + end do + nfiller = 2 + filler(1) = l + lfiller = cfiller + else + filler(nfiller) = l + nfiller += 1 + end if + comp_filler(l) = .True. + end if + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! enddo + + + do j=1,nfiller-1 + if(.not. computed(filler(j)))then + k=N_dress_jobs+1 + dress_jobs(k) = filler(j) + N_dress_jobs = k + end if + computed(filler(j)) = .true. + end do + + N_cp = cur_cp + if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then print *, N_dress_jobs, N_det_generators, N_cp, N_cps_max stop "error in jobs creation" @@ -439,8 +502,10 @@ END_PROVIDER do i=1,N_dress_jobs if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) done_cp_at(i) = cur_cp + done_cp_at_det(dress_jobs(i)) = cur_cp + needed_by_cp(cur_cp) += 1 end do - + under_det = 0 cp_first_tooth = 0 @@ -461,13 +526,41 @@ END_PROVIDER end if end do end do - cps(:, N_cp) = 0d0 cp_first_tooth(N_cp) = comb_teeth+1 + + do i=1,N_det_generators + do j=N_cp,2,-1 + cps(i,j) -= cps(i,j-1) + end do + end do iorder = -1 - do i=1,N_cp-1 - call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) + + cps(:, N_cp) = 0d0 + + iloop : do i=fragment_first+1,N_det_generators + k = tooth_of_det(i) + if(k == 0) cycle + if (i == first_det_of_teeth(k)) cycle + + do j=1,N_cp + if(cps(i, j) /= 0d0) cycle iloop + end do + + tooth_reduce(i) = k + end do iloop + + do i=1,N_det_generators + if(tooth_reduce(dress_jobs(i)) == 0) dress_jobs(i) = dress_jobs(i)+N_det*2 end do + + do i=1,N_cp-1 + call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1) + end do + + do i=1,N_det_generators + if(dress_jobs(i) > N_det) dress_jobs(i) = dress_jobs(i) - N_det*2 + end do END_PROVIDER @@ -527,7 +620,6 @@ subroutine add_comb(com, computed, cp, N, tbc) !DIR$ FORCEINLINE call get_comb(com, dets) - k=N+1 do i = 1, comb_teeth l = dets(i) @@ -588,10 +680,11 @@ END_PROVIDER norm_left = 1d0 comb_step = 1d0/dfloat(comb_teeth) + !print *, "comb_step", comb_step first_det_of_comb = 1 - do i=1,min(100,N_det_generators) + do i=1,N_det_generators ! min(100,N_det_generators) + first_det_of_comb = i if(dress_weight(i)/norm_left < comb_step) then - first_det_of_comb = i exit end if norm_left -= dress_weight(i) diff --git a/plugins/dress_zmq/dress_zmq_routines.irp.f b/plugins/dress_zmq/dress_zmq_routines.irp.f index bde2c6d8..dc47eb20 100644 --- a/plugins/dress_zmq/dress_zmq_routines.irp.f +++ b/plugins/dress_zmq/dress_zmq_routines.irp.f @@ -2,6 +2,8 @@ subroutine dress_zmq() implicit none double precision, allocatable :: energy(:) allocate (energy(N_states)) + threshold_selectors = 1.d0 + threshold_generators = 1d0 read_wf = .True. SOFT_TOUCH read_wf diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 928c72c5..bf2ab207 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -63,8 +63,20 @@ BEGIN_PROVIDER [ double precision, dress_norm_acc, (0:N_det, N_states) ] END_PROVIDER +BEGIN_PROVIDER [ integer , N_det_delta_ij ] + implicit none + N_det_delta_ij = 1 +END_PROVIDER -BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det,2) ] +BEGIN_PROVIDER [ double precision, delta_ij, (N_states, N_det, 2) ] + implicit none + if(.true.) then + delta_ij(:,:N_det_delta_ij, :) = delta_ij_tmp(:,:,:) + endif + delta_ij(:,N_det_delta_ij+1:,:) = 0d0 +END_PROVIDER + +BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] use bitmasks implicit none @@ -72,29 +84,35 @@ BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det,2) ] double precision, allocatable :: dress(:), del(:,:), del_s2(:,:) double precision :: E_CI_before(N_states), relative_error -! double precision, save :: errr = 0d0 - allocate(dress(N_states), del(N_states, N_det), del_s2(N_states, N_det)) - - delta_ij = 0d0 - - E_CI_before(:) = psi_energy(:) + nuclear_repulsion - threshold_selectors = 1.d0 - threshold_generators = 1d0 -! if(errr /= 0d0) then -! errr = errr / 2d0 -! else -! errr = 1d-4 -! end if - relative_error = 1.d-4 - call write_double(6,relative_error,"Convergence of the stochastic algorithm") - - call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) - delta_ij(:,:,1) = del(:,:) - delta_ij(:,:,2) = del_s2(:,:) - - deallocate(dress, del, del_s2) + ! prevents re-providing if delta_ij_tmp is + ! just being copied + if(N_det_delta_ij == N_det) then + allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) + + delta_ij_tmp = 0d0 + + E_CI_before(:) = psi_energy(:) + nuclear_repulsion + threshold_selectors = 1.d0 + threshold_generators = 1.d0 + SOFT_TOUCH threshold_selectors threshold_generators + ! if(errr /= 0d0) then + ! errr = errr / 2d0 + ! else + ! errr = 1d-4 + ! end if + relative_error = 1.d-2 + + call write_double(6,relative_error,"Convergence of the stochastic algorithm") + + call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij) + delta_ij_tmp(:,:,1) = del(:,:) + delta_ij_tmp(:,:,2) = del_s2(:,:) + + + deallocate(dress, del, del_s2) + end if END_PROVIDER diff --git a/plugins/dress_zmq/dressing_vector.irp.f b/plugins/dress_zmq/dressing_vector.irp.f index e098cf78..98c69cbf 100644 --- a/plugins/dress_zmq/dressing_vector.irp.f +++ b/plugins/dress_zmq/dressing_vector.irp.f @@ -9,16 +9,17 @@ integer :: i,ii,k,j, l double precision :: f, tmp double precision, external :: u_dot_v - + logical, external :: detEq + dressing_column_h(:,:) = 0.d0 dressing_column_s(:,:) = 0.d0 do k=1,N_states do j = 1, n_det dressing_column_h(j,k) = delta_ij(k,j,1) - dressing_column_s(j,k) = delta_ij(k,j,2) + dressing_column_s(j,k) = delta_ij(k,j,2) +! print *, j, delta_ij(k,j,:) enddo enddo - END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 0311d2ed..8801cb3f 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -1,3 +1,5 @@ +use bitmasks + BEGIN_PROVIDER [ integer, fragment_count ] implicit none BEGIN_DOC @@ -7,16 +9,17 @@ BEGIN_PROVIDER [ integer, fragment_count ] END_PROVIDER -subroutine run_dress_slave(thread,iproc,energy) +subroutine run_dress_slave(thread,iproce,energy) use f77_zmq + use omp_lib implicit none double precision, intent(in) :: energy(N_states_diag) - integer, intent(in) :: thread, iproc + integer, intent(in) :: thread, iproce integer :: rc, i, subset, i_generator integer :: worker_id, task_id, ctask, ltask - character*(512) :: task + character*(5120) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -30,63 +33,257 @@ subroutine run_dress_slave(thread,iproc,energy) integer :: ind double precision,allocatable :: delta_ij_loc(:,:,:) - double precision :: div(N_states) integer :: h,p,n,i_state logical :: ok - allocate(delta_ij_loc(N_states,N_det,2)) + integer, allocatable :: int_buf(:) + double precision, allocatable :: double_buf(:) + integer(bit_kind), allocatable :: det_buf(:,:,:) + integer :: N_buf(3) + logical :: last + !integer, external :: omp_get_thread_num + double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:) + integer :: toothMwen + logical :: fracted + double precision :: fac + + +! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" + allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) + allocate(cp(N_states, N_det, N_cp, 2)) + delta_det = 0d9 + cp = 0d0 + + + task(:) = CHAR(0) + + + + integer :: iproc, cur_cp, done_for(0:N_cp) + integer, allocatable :: tasks(:) + integer :: lastCp(Nproc) + integer :: lastSent, lastSendable + logical :: send + integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1) + integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1) + + do i=0,N_cp+1 + call omp_init_lock(lck_sto(i)) + end do + do i=0,comb_teeth+1 + call omp_init_lock(lck_det(i)) + end do + + lastCp = 0 + lastSent = 0 + send = .false. + done_for = 0 + + double precision :: hij, sij + !call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) + + hij = dress_E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL + + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & + !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & + !$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) & + !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) if(worker_id == -1) then - print *, "WORKER -1" call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) - return + stop "WORKER -1" end if - do i=1,N_states - div(i) = psi_coef(dressed_column_idx(i), i) - end do + + + iproc = omp_get_thread_num()+1 + allocate(int_buf(N_dress_int_buffer)) + allocate(double_buf(N_dress_double_buffer)) + allocate(det_buf(N_int, 2, N_dress_det_buffer)) + allocate(delta_ij_loc(N_states,N_det,2)) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - + task = task//" 0" + if(task_id == 0) exit + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if(task_id /= 0) then read (task,*) subset, i_generator - delta_ij_loc = 0d0 - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) + !$OMP ATOMIC + done_for(done_cp_at_det(i_generator)) += 1 + ! print *, "IGEN", i_generator, done_cp_at_det(i_generator) + delta_ij_loc(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + call alpha_callback(delta_ij_loc, i_generator, subset, iproc) + call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) + + do i=1,N_cp + fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step + if(fac == 0d0) cycle + call omp_set_lock(lck_sto(i)) + cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) + cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) + call omp_unset_lock(lck_sto(i)) + end do + + + toothMwen = tooth_of_det(i_generator) + fracted = (toothMwen /= 0) + if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) + if(fracted) then + call omp_set_lock(lck_det(toothMwen)) + call omp_set_lock(lck_det(toothMwen-1)) + delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) + call omp_unset_lock(lck_det(toothMwen)) + call omp_unset_lock(lck_det(toothMwen-1)) + else + call omp_set_lock(lck_det(toothMwen)) + delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) + delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) + call omp_unset_lock(lck_det(toothMwen)) + end if + call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, task_id) - else - exit + lastCp(iproc) = done_cp_at_det(i_generator) end if + + !$OMP CRITICAL + send = .false. + lastSendable = N_cp*2 + do i=1,Nproc + lastSendable = min(lastCp(i), lastSendable) + end do + lastSendable -= 1 + if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then + lastSent = lastSendable + cur_cp = lastSent + send = .true. + end if + !$OMP END CRITICAL + + if(send) then + N_buf = (/0,1,0/) + + delta_ij_loc = 0d0 + if(cur_cp < 1) stop "cur_cp < 1" + do i=1,cur_cp + delta_ij_loc(:,:,1) += cp(:,:,i,1) + delta_ij_loc(:,:,2) += cp(:,:,i,2) + end do + + delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) + do i=cp_first_tooth(cur_cp)-1,0,-1 + delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1) + delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2) + end do + call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) + end if + + if(task_id == 0) exit end do + call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) - call end_zmq_push_socket(zmq_socket_push,thread) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + !$OMP END PARALLEL + + do i=0,N_cp+1 + call omp_destroy_lock(lck_sto(i)) + end do + do i=0,comb_teeth+1 + call omp_destroy_lock(lck_det(i)) + end do end subroutine -subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) + + +subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) use f77_zmq implicit none - + + integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(in) :: delta_loc(N_states, N_det, 2) - integer, intent(in) :: ind, task_id - integer :: rc, i + double precision, intent(inout) :: delta_loc(N_states, N_det, 2) + real(kind=4), allocatable :: delta_loc4(:,:,:) + double precision, intent(in) :: double_buf(*) + integer, intent(in) :: int_buf(*) + integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) + integer, intent(in) :: N_bufi(3) + integer :: N_buf(3) + integer, intent(in) :: ind, cur_cp, task_id + integer :: rc, i, j, k, l + double precision :: contrib(N_states) + real(sendt), allocatable :: r4buf(:,:,:) + + rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" - rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + if(cur_cp /= -1) then + allocate(r4buf(N_states, N_det, 2)) + do i=1,2 + do j=1,N_det + do k=1,N_states + r4buf(k,j,i) = real(delta_loc(k,j,i), sendt) + end do + end do + end do + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE) + if(rc /= sendt*N_states*N_det) stop "push" - rc = f77_zmq_send( zmq_socket_push, delta_loc, 8*N_states*N_det*2, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det*2) stop "push" - - rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push" + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE) + if(rc /= sendt*N_states*N_det) stop "push" + else + contrib = 0d0 + do i=1,N_det + contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) + end do + + rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + + N_buf = N_bufi + !N_buf = (/0,1,0/) + + rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) + if(rc /= 4*3) stop "push5" + + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + + + if(N_buf(1) > 0) then + rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) + if(rc /= 4*N_buf(1)) stop "push6" + end if + + if(N_buf(2) > 0) then + rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) + if(rc /= 8*N_buf(2)) stop "push8" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" + end if + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "push11" + end if ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH @@ -98,25 +295,80 @@ IRP_ENDIF end subroutine -subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) +BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ] + +END_PROVIDER + + +subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) use f77_zmq implicit none + integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + integer, intent(out) :: cur_cp double precision, intent(inout) :: delta_loc(N_states, N_det, 2) + double precision, intent(out) :: double_buf(*), contrib(N_states) + integer, intent(out) :: int_buf(*) + integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) integer, intent(out) :: ind integer, intent(out) :: task_id - integer :: rc, i - + integer :: rc, i, j, k + integer, intent(out) :: N_buf(3) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pulla" + + rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0) + if(rc /= 4) stop "pulla" + + - rc = f77_zmq_recv( zmq_socket_pull, delta_loc, N_states*8*N_det*2, 0) - if(rc /= 8*N_states*N_det*2) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull" + if(cur_cp /= -1) then + + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0) + if(rc /= sendt*N_states*N_det) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0) + if(rc /= sendt*N_states*N_det) stop "pulld" + + do i=1,2 + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) + do j=1,N_det + do k=1,N_states + delta_loc(k,j,i) = real(real4buf(k,j,i), 8) + end do + end do + end do + else + rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) + if(rc /= 8*N_states) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) + if(rc /= 4*3) stop "pull" + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + + if(N_buf(1) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) + if(rc /= 4*N_buf(1)) stop "pull1" + end if + + if(N_buf(2) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) + if(rc /= 8*N_buf(2)) stop "pull2" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" + end if + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "pull4" + end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index fc4592ea..ad557d1a 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -581,7 +581,7 @@ END_PROVIDER double precision, allocatable :: mrcc(:) double precision :: E_CI_before!, relative_error - double precision, save :: target_error = 1d-4 + double precision, save :: target_error = 2d-2 allocate(mrcc(N_states)) @@ -594,11 +594,10 @@ END_PROVIDER threshold_selectors = 1.d0 threshold_generators = 1d0 if(target_error /= 0d0) then - target_error = target_error / 2d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 + target_error = target_error * 0.5d0 ! (-mrcc_E0_denominator(1) + mrcc_previous_E(1)) / 1d1 else target_error = 1d-4 end if - target_error = 0d0 call ZMQ_mrcc(E_CI_before, mrcc, delta_ij_mrcc_zmq, delta_ij_s2_mrcc_zmq, abs(target_error)) mrcc_previous_E(:) = mrcc_E0_denominator(:) diff --git a/plugins/shiftedbk/EZFIO.cfg b/plugins/shiftedbk/EZFIO.cfg index c8dbb19e..be4998dd 100644 --- a/plugins/shiftedbk/EZFIO.cfg +++ b/plugins/shiftedbk/EZFIO.cfg @@ -1,22 +1,44 @@ -[energy] -type: double precision -doc: Calculated energy -interface: ezfio - -[thresh_dressed_ci] -type: Threshold -doc: Threshold on the convergence of the dressed CI energy -interface: ezfio,provider,ocaml -default: 1.e-5 - -[n_it_max_dressed_ci] -type: Strictly_positive_int -doc: Maximum number of dressed CI iterations -interface: ezfio,provider,ocaml -default: 10 - [h0_type] type: Perturbation doc: Type of zeroth-order Hamiltonian [ EN | Barycentric ] interface: ezfio,provider,ocaml default: EN +[energy] +type: double precision +doc: Calculated Selected FCI energy +interface: ezfio + +[energy_pt2] +type: double precision +doc: Calculated FCI energy + PT2 +interface: ezfio + +[iterative_save] +type: integer +doc: Save data at each iteration : 1(Append) | 2(Overwrite) | 3(NoSave) +interface: ezfio,ocaml +default: 2 + +[n_iter] +interface: ezfio +doc: number of iterations +type:integer + +[n_det_iter] +interface: ezfio +doc: number of determinants at iteration +type: integer +size: (full_ci_zmq.n_iter) + +[energy_iter] +interface: ezfio +doc: The energy without a pt2 correction for n_det +type: double precision +size: (determinants.n_states,full_ci_zmq.n_iter) + +[pt2_iter] +interface: ezfio +doc: The pt2 correction for n_det +type: double precision +size: (determinants.n_states,full_ci_zmq.n_iter) + diff --git a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES index c3290687..0a06e986 100644 --- a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES +++ b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -dress_zmq DavidsonDressed +dress_zmq DavidsonDressed Selectors_full Generators_CAS diff --git a/plugins/shiftedbk/selection_buffer.irp.f b/plugins/shiftedbk/selection_buffer.irp.f new file mode 100644 index 00000000..8b140666 --- /dev/null +++ b/plugins/shiftedbk/selection_buffer.irp.f @@ -0,0 +1,244 @@ + +subroutine create_selection_buffer(N, siz_, res) + use selection_types + implicit none + + integer, intent(in) :: N, siz_ + type(selection_buffer), intent(out) :: res + + integer :: siz + siz = max(siz_,1) + allocate(res%det(N_int, 2, siz), res%val(siz)) + + res%val(:) = 0d0 + res%det(:,:,:) = 0_8 + res%N = N + res%mini = 0d0 + res%cur = 0 +end subroutine + +subroutine delete_selection_buffer(b) + use selection_types + implicit none + type(selection_buffer), intent(inout) :: b + if (associated(b%det)) then + deallocate(b%det) + endif + if (associated(b%val)) then + deallocate(b%val) + endif +end + + +subroutine add_to_selection_buffer(b, det, val) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer(bit_kind), intent(in) :: det(N_int, 2) + double precision, intent(in) :: val + integer :: i + + if(b%N > 0 .and. val <= b%mini) then + b%cur += 1 + b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2) + b%val(b%cur) = val + if(b%cur == size(b%val)) then + call sort_selection_buffer(b) + end if + end if +end subroutine + +subroutine merge_selection_buffers(b1, b2) + use selection_types + implicit none + BEGIN_DOC +! Merges the selection buffers b1 and b2 into b2 + END_DOC + type(selection_buffer), intent(inout) :: b1 + type(selection_buffer), intent(inout) :: b2 + integer(bit_kind), pointer :: detmp(:,:,:) + double precision, pointer :: val(:) + integer :: i, i1, i2, k, nmwen + if (b1%cur == 0) return + do while (b1%val(b1%cur) > b2%mini) + b1%cur = b1%cur-1 + if (b1%cur == 0) then + return + endif + enddo + nmwen = min(b1%N, b1%cur+b2%cur) + allocate( val(size(b1%val)), detmp(N_int, 2, size(b1%det,3)) ) + i1=1 + i2=1 + do i=1,nmwen + if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then + exit + else if (i1 > b1%cur) then + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + else if (i2 > b2%cur) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + if (b1%val(i1) <= b2%val(i2)) then + val(i) = b1%val(i1) + detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1) + detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1) + i1=i1+1 + else + val(i) = b2%val(i2) + detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2) + detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2) + i2=i2+1 + endif + endif + enddo + deallocate(b2%det, b2%val) + do i=nmwen+1,b2%N + val(i) = 0.d0 + detmp(1:N_int,1:2,i) = 0_bit_kind + enddo + b2%det => detmp + b2%val => val + b2%mini = min(b2%mini,b2%val(b2%N)) + b2%cur = nmwen +end + + +subroutine sort_selection_buffer(b) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + integer, allocatable :: iorder(:) + integer(bit_kind), pointer :: detmp(:,:,:) + integer :: i, nmwen + logical, external :: detEq + if (b%N == 0 .or. b%cur == 0) return + nmwen = min(b%N, b%cur) + + allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3))) + do i=1,b%cur + iorder(i) = i + end do + call dsort(b%val, iorder, b%cur) + do i=1, nmwen + detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) + detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) + end do + deallocate(b%det,iorder) + b%det => detmp + b%mini = min(b%mini,b%val(b%N)) + b%cur = nmwen +end subroutine + + + +subroutine truncate_to_mini(b) + use selection_types + implicit none + + type(selection_buffer), intent(inout) :: b + + do + if(b%cur == 0) exit + if(b%val(b%cur) <= b%mini) exit + b%cur -= 1 + end do +end subroutine + + + + + +subroutine unique_selection_buffer(b) + use selection_types + implicit none + BEGIN_DOC +! Removes duplicate determinants in the selection buffer + END_DOC + type(selection_buffer), intent(inout) :: b + integer, allocatable :: iorder(:) + integer(bit_kind), pointer :: detmp(:,:,:) + double precision, pointer :: val(:) + integer :: i,j,k + integer(bit_kind), allocatable :: bit_tmp(:) + logical,allocatable :: duplicate(:) + + logical :: found_duplicates + integer*8, external :: det_search_key + + if (b%N == 0 .or. b%cur == 0) return + allocate (duplicate(b%cur), val(size(b%val)), detmp(N_int, 2, size(b%val)), bit_tmp(b%cur)) + call sort_dets_by_det_search_key(b%cur, b%det, b%val, detmp, val, 1) + + deallocate(b%det, b%val) + do i=b%cur+1,b%N + val(i) = 0.d0 + detmp(1:N_int,1:2,i) = 0_bit_kind + enddo + b%det => detmp + b%val => val + + do i=1,b%cur + bit_tmp(i) = det_search_key(b%det(1,1,i),N_int) + duplicate(i) = .False. + enddo + + do i=1,b%cur-1 + if (duplicate(i)) then + cycle + endif + j = i+1 + do while (bit_tmp(j)==bit_tmp(i)) + if (duplicate(j)) then + j += 1 + if (j > b%cur) then + exit + else + cycle + endif + endif + duplicate(j) = .True. + do k=1,N_int + if ( (b%det(k,1,i) /= b%det(k,1,j) ) & + .or. (b%det(k,2,i) /= b%det(k,2,j) ) ) then + duplicate(j) = .False. + exit + endif + enddo + j += 1 + if (j > b%cur) then + exit + endif + enddo + enddo + + found_duplicates = .False. + do i=1,b%cur + if (duplicate(i)) then + found_duplicates = .True. + exit + endif + enddo + + if (found_duplicates) then + k=0 + do i=1,N_det + if (.not.duplicate(i)) then + k += 1 + b%det(:,:,k) = b%det(:,:,i) + b%val(k) = b%val(i) + endif + enddo + b%cur = k + endif + deallocate (duplicate,bit_tmp) +end + + diff --git a/plugins/shiftedbk/selection_types.f90 b/plugins/shiftedbk/selection_types.f90 new file mode 100644 index 00000000..29e48524 --- /dev/null +++ b/plugins/shiftedbk/selection_types.f90 @@ -0,0 +1,9 @@ +module selection_types + type selection_buffer + integer :: N, cur + integer(8) , pointer :: det(:,:,:) + double precision, pointer :: val(:) + double precision :: mini + endtype +end module + diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index b7a2a1ce..df971b4f 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -3,7 +3,14 @@ program shifted_bk BEGIN_DOC ! TODO END_DOC + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order - call diagonalize_CI() + + !call diagonalize_CI() call dress_zmq() end + diff --git a/plugins/shiftedbk/shifted_bk_iter.irp.f b/plugins/shiftedbk/shifted_bk_iter.irp.f new file mode 100644 index 00000000..429efa4b --- /dev/null +++ b/plugins/shiftedbk/shifted_bk_iter.irp.f @@ -0,0 +1,159 @@ +program shifted_bk + implicit none + integer :: i,j,k + double precision, allocatable :: pt2(:) + integer :: degree + integer :: n_det_before + double precision :: threshold_davidson_in + + allocate (pt2(N_states)) + + double precision :: hf_energy_ref + logical :: has + double precision :: relative_error, absolute_error + integer :: N_states_p + character*(512) :: fmt + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + + pt2 = -huge(1.e0) + threshold_davidson_in = threshold_davidson + threshold_davidson = threshold_davidson_in * 100.d0 + SOFT_TOUCH threshold_davidson + + call diagonalize_CI_dressed + call save_wavefunction + + call ezfio_has_hartree_fock_energy(has) + if (has) then + call ezfio_get_hartree_fock_energy(hf_energy_ref) + else + hf_energy_ref = ref_bitmask_energy + endif + + if (N_det > N_det_max) then + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + N_det = N_det_max + soft_touch N_det psi_det psi_coef + call diagonalize_CI_dressed + call save_wavefunction + N_states_p = min(N_det,N_states) + endif + + n_det_before = 0 + + character*(8) :: pt2_string + double precision :: threshold_selectors_save, threshold_generators_save + threshold_selectors_save = threshold_selectors + threshold_generators_save = threshold_generators + double precision :: error(N_states), energy(N_states) + error = 0.d0 + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + + if (.True.) then + pt2_string = '(sh-Bk) ' + do while ( (N_det < N_det_max) ) + write(*,'(A)') '--------------------------------------------------------------------------------' + + N_det_delta_ij = N_det + + do i=1,N_states + energy(i) = psi_energy(i)+nuclear_repulsion + enddo + + PROVIDE delta_ij_tmp + call delta_ij_done() + + call diagonalize_ci_dressed + do i=1,N_states + pt2(i) = ci_energy_dressed(i) - energy(i) + enddo + + N_states_p = min(N_det,N_states) + + print *, '' + print '(A,I12)', 'Summary at N_det = ', N_det + print '(A)', '-----------------------------------' + print *, '' + print *, '' + + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + write(fmt,*) '(12X,', N_states_p, '(6X,A7,1X,I6,10X))' + write(*,fmt) ('State',k, k=1,N_states_p) + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + write(fmt,*) '(A12,', N_states_p, '(1X,F14.8,15X))' + write(*,fmt) '# E ', energy(1:N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', energy(1:N_states_p)-energy(1) + write(*,fmt) '# Excit. (eV)', (energy(1:N_states_p)-energy(1))*27.211396641308d0 + endif + write(fmt,*) '(A12,', 2*N_states_p, '(1X,F14.8))' + write(*,fmt) '# PT2'//pt2_string, (pt2(k), error(k), k=1,N_states_p) + write(*,'(A)') '#' + write(*,fmt) '# E+PT2 ', (energy(k)+pt2(k),error(k), k=1,N_states_p) + if (N_states_p > 1) then + write(*,fmt) '# Excit. (au)', ( (energy(k)+pt2(k)-energy(1)-pt2(1)), & + dsqrt(error(k)*error(k)+error(1)*error(1)), k=1,N_states_p) + write(*,fmt) '# Excit. (eV)', ( (energy(k)+pt2(k)-energy(1)-pt2(1))*27.211396641308d0, & + dsqrt(error(k)*error(k)+error(1)*error(1))*27.211396641308d0, k=1,N_states_p) + endif + write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' + write(*,fmt) + print *, '' + + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + + do k=1, N_states_p + print*,'State ',k + print *, 'PT2 = ', pt2(k) + print *, 'E = ', energy(k) + print *, 'E+PT2'//pt2_string//' = ', energy(k)+pt2(k) + enddo + + print *, '-----' + if(N_states.gt.1)then + print *, 'Variational Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (energy(i) - energy(1)), & + (energy(i) - energy(1)) * 27.211396641308d0 + enddo + print *, '-----' + print*, 'Variational + perturbative Energy difference (au | eV)' + do i=2, N_states_p + print*,'Delta E = ', (energy(i)+ pt2(i) - (energy(1) + pt2(1))), & + (energy(i)+ pt2(i) - (energy(1) + pt2(1))) * 27.211396641308d0 + enddo + endif + call ezfio_set_shiftedbk_energy_pt2(energy(1)+pt2(1)) +! call dump_fci_iterations_value(N_det,energy,pt2) + + n_det_before = N_det + + PROVIDE psi_coef + PROVIDE psi_det + PROVIDE psi_det_sorted + + if (N_det >= N_det_max) then + threshold_davidson = threshold_davidson_in + end if + call save_wavefunction + call ezfio_set_shiftedbk_energy(energy(1)) + call ezfio_set_shiftedbk_energy_pt2(ci_energy_dressed(1)) + enddo + endif + + + + +end + diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 80383e39..bab8490d 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,46 +1,249 @@ +use selection_types + + + BEGIN_PROVIDER [ double precision, global_sum_alpha2, (N_states) ] +&BEGIN_PROVIDER [ double precision, slave_sum_alpha2, (N_states, Nproc) ] + global_sum_alpha2 = 0d0 + slave_sum_alpha2 = 0d0 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] -&BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] +&BEGIN_PROVIDER [ integer, n_det_add ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] +&BEGIN_PROVIDER [ type(selection_buffer), sb, (Nproc) ] +&BEGIN_PROVIDER [ type(selection_buffer), global_sb ] +&BEGIN_PROVIDER [ type(selection_buffer), mini_sb ] +&BEGIN_PROVIDER [ double precision, N_det_increase_factor ] + implicit none + fock_diag_tmp_(:,:,:) = 0.d0 + integer :: i + + N_det_increase_factor = dble(N_states) + + + n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) + call create_selection_buffer(n_det_add, n_det_add*2, global_sb) + call create_selection_buffer(n_det_add, n_det_add*2, mini_sb) + do i=1,Nproc + call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) + end do + a_h_i = 0d0 + a_s2_i = 0d0 +END_PROVIDER + + + BEGIN_PROVIDER [ integer, N_dress_int_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] implicit none - current_generator_(:) = 0 - fock_diag_tmp_(:,:,:) = 0.d0 - a_h_i = 0d0 - a_s2_i = 0d0 - END_PROVIDER + N_dress_int_buffer = 1 + N_dress_double_buffer = n_det_add+N_states + N_dress_det_buffer = n_det_add +END_PROVIDER +subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) + implicit none + integer, intent(in) :: i_gen, iproc + integer, intent(out) :: int_buf(N_dress_int_buffer), N_buf(3) + double precision, intent(out) :: double_buf(N_dress_double_buffer) + integer(bit_kind), intent(out) :: det_buf(N_int, 2, N_dress_det_buffer) + integer :: i + int_buf(:) = 0 + + call sort_selection_buffer(sb(iproc)) + + if(sb(iproc)%cur > 0) then + !$OMP CRITICAL + call merge_selection_buffers(sb(iproc), mini_sb) + !call sort_selection_buffer(mini_sb) + do i=1,Nproc + mini_sb%mini = min(sb(i)%mini, mini_sb%mini) + end do + do i=1,Nproc + sb(i)%mini = mini_sb%mini + end do + !$OMP END CRITICAL + end if + call truncate_to_mini(sb(iproc)) + det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) + double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) + double_buf(sb(iproc)%cur+1:sb(iproc)%cur+N_states) = slave_sum_alpha2(:,iproc) + N_buf(1) = 1 + N_buf(2) = sb(iproc)%cur+N_states + N_buf(3) = sb(iproc)%cur + + sb(iproc)%cur = 0 + slave_sum_alpha2(:,iproc) = 0d0 +end subroutine -subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) - use bitmasks - implicit none + +subroutine generator_start(i_gen, iproc) + implicit none + integer, intent(in) :: i_gen, iproc + integer :: i + + call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) +end subroutine + + +subroutine dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) + use bitmasks + implicit none + + integer, intent(in) :: ind, N_buf(3) + integer, intent(in) :: int_buf(*) + double precision, intent(in) :: double_buf(*) + integer(bit_kind), intent(in) :: det_buf(N_int,2,*) + integer :: i + + do i=1,N_buf(3) + call add_to_selection_buffer(global_sb, det_buf(1,1,i), double_buf(i)) + end do + if(N_buf(3) + N_states /= N_buf(2)) stop "buf size" + !$OMP CRITICAL + global_sum_alpha2(:) += double_buf(N_buf(3)+1:N_buf(2)) + !$OMP END CRITICAL +end subroutine + + +subroutine delta_ij_done() + use bitmasks + implicit none + integer :: i, old_det_gen + integer(bit_kind), allocatable :: old_generators(:,:,:) + + allocate(old_generators(N_int, 2, N_det_generators)) + old_generators(:,:,:) = psi_det_generators(:,:,:N_det_generators) + old_det_gen = N_det_generators + + + ! Add buffer only when the last state is computed + call unique_selection_buffer(global_sb) + call sort_selection_buffer(global_sb) + call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) + call copy_H_apply_buffer_to_wf() + if (s2_eig.or.(N_states > 1) ) then + call make_s2_eigenfunction + endif + call undress_with_alpha(old_generators, old_det_gen, psi_det(1,1,N_det_delta_ij+1), N_det-N_det_delta_ij) + call save_wavefunction + +end subroutine + + +subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: alpha(N_int,2,n_alpha) + integer, intent(in) :: n_alpha + integer, allocatable :: minilist(:) + integer(bit_kind), allocatable :: det_minilist(:,:,:) + double precision, allocatable :: delta_ij_loc(:,:,:,:) + integer :: exc(0:2,2,2), h1, h2, p1, p2, s1, s2 + integer :: i, j, k, ex, n_minilist, iproc, degree + double precision :: haa, contrib, phase, c_alpha(N_states,Nproc), s_c_alpha(N_states) + logical :: ok + integer, external :: omp_get_thread_num + + integer,intent(in) :: old_det_gen + integer(bit_kind), intent(in) :: old_generators(N_int, 2, old_det_gen) + + allocate(minilist(N_det_delta_ij), det_minilist(N_int, 2, N_det_delta_ij), delta_ij_loc(N_states, N_det_delta_ij, 2, Nproc)) + + c_alpha = 0d0 + delta_ij_loc = 0d0 + + !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) PRIVATE(i, j, iproc, n_minilist, ex) & + !$OMP PRIVATE(det_minilist, minilist, haa, contrib, s_c_alpha) & + !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) + do i=n_alpha,1,-1 + iproc = omp_get_thread_num()+1 + if(mod(i,10000) == 0) print *, "UNDRESSING", i, "/", n_alpha, iproc + n_minilist = 0 + ok = .false. + + do j=1, old_det_gen + call get_excitation_degree(alpha(1,1,i), old_generators(1,1,j), ex, N_int) + if(ex <= 2) then + call get_excitation(old_generators(1,1,j), alpha(1,1,i), exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + ok = (mo_class(h1)(1:1) == 'A' .or. mo_class(h1)(1:1) == 'I') .and. & + (mo_class(p1)(1:1) == 'A' .or. mo_class(p1)(1:1) == 'V') + if(ok .and. degree == 2) then + ok = (mo_class(h2)(1:1) == 'A' .or. mo_class(h2)(1:1) == 'I') .and. & + (mo_class(p2)(1:1) == 'A' .or. mo_class(p2)(1:1) == 'V') + end if + if(ok) exit + end if + end do + + if(.not. ok) cycle + + do j=1, N_det_delta_ij + call get_excitation_degree(alpha(1,1,i), psi_det(1,1,j), ex, N_int) + if(ex <= 2) then + n_minilist += 1 + det_minilist(:,:,n_minilist) = psi_det(:,:,j) + minilist(n_minilist) = j + end if + end do + call i_h_j(alpha(1,1,i), alpha(1,1,i), N_int, haa) + call dress_with_alpha_(N_states, N_det_delta_ij, N_int, delta_ij_loc(1,1,1,iproc), & + minilist, det_minilist, n_minilist, alpha(1,1,i), haa, contrib, s_c_alpha, iproc) + + c_alpha(:,iproc) += s_c_alpha(:)**2 + end do + !$OMP END PARALLEL DO + + do i=2,Nproc + delta_ij_loc(:,:,:,1) += delta_ij_loc(:,:,:,i) + c_alpha(:,1) += c_alpha(:,i) + end do + + + delta_ij_tmp(:,:,1) -= delta_ij_loc(:,:,1,1) + delta_ij_tmp(:,:,2) -= delta_ij_loc(:,:,2,1) + + !print *, "SUM ALPHA2 PRE", global_sum_alpha2 + !global_sum_alpha2(:) -= c_alpha(:,1) + print *, "SUM C_ALPHA^2 =", global_sum_alpha2(:) + !print *, "*** DRESSINS DIVIDED BY 1+SUM C_ALPHA^2 ***" + !do i=1,N_states + ! delta_ij_tmp(i,:,:) = delta_ij_tmp(i,:,:) / (1d0 + global_sum_alpha2(i)) + !end do + global_sum_alpha2 = 0d0 +end subroutine + + +subroutine dress_with_alpha_(Nstates,Ndet,Nint,delta_ij_loc,minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) + use bitmasks + implicit none BEGIN_DOC !delta_ij_loc(:,:,1) : dressing column for H !delta_ij_loc(:,:,2) : dressing column for S2 - !i_gen : generator index in psi_det_generators - !minilist : indices of determinants connected to alpha ( in psi_det_sorted ) + !minilist : indices of determinants connected to alpha ( in psi_det ) !n_minilist : size of minilist !alpha : alpha determinant END_DOC - integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen + integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) integer,intent(in) :: minilist(n_minilist) double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) - double precision :: haa, hij, sij + double precision, intent(out) :: contrib, c_alpha(N_states) + double precision,intent(in) :: haa + double precision :: hij, sij double precision, external :: diag_H_mat_elem_fock integer :: i,j,k,l,m, l_sd double precision :: hdress, sdress - double precision :: de, a_h_psi(Nstates), c_alpha + double precision :: de, a_h_psi(Nstates) a_h_psi = 0d0 - if(current_generator_(iproc) /= i_gen) then - current_generator_(iproc) = i_gen - call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) - end if - - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) do l_sd=1,n_minilist call i_h_j_s2(alpha,det_minilist(1,1,l_sd),N_int,hij, sij) @@ -51,16 +254,22 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili end do end do + contrib = 0d0 do i=1,Nstates de = dress_E0_denominator(i) - haa if(DABS(de) < 1D-5) cycle - c_alpha = a_h_psi(i) / de - + c_alpha(i) = a_h_psi(i) / de + contrib = min(contrib, c_alpha(i) * a_h_psi(i)) + do l_sd=1,n_minilist - hdress = c_alpha * a_h_i(l_sd, iproc) - sdress = c_alpha * a_s2_i(l_sd, iproc) + hdress = c_alpha(i) * a_h_i(l_sd, iproc) + sdress = c_alpha(i) * a_s2_i(l_sd, iproc) + !if(c_alpha(i) * a_s2_i(l_sd, iproc) > 1d-1) then + ! call debug_det(det_minilist(1,1,l_sd), N_int) + ! call debug_det(alpha,N_int) + !end if delta_ij_loc(i, minilist(l_sd), 1) += hdress delta_ij_loc(i, minilist(l_sd), 2) += sdress end do @@ -68,6 +277,37 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili end subroutine +subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minilist, det_minilist, n_minilist, alpha, iproc) + use bitmasks + implicit none + BEGIN_DOC + !delta_ij_loc(:,:,1) : dressing column for H + !delta_ij_loc(:,:,2) : dressing column for S2 + !i_gen : generator index in psi_det_generators + !minilist : indices of determinants connected to alpha ( in psi_det ) + !n_minilist : size of minilist + !alpha : alpha determinant + END_DOC + integer, intent(in) :: Nint, Ndet, Nstates, n_minilist, iproc, i_gen + integer(bit_kind), intent(in) :: alpha(Nint,2), det_minilist(Nint, 2, n_minilist) + integer,intent(in) :: minilist(n_minilist) + double precision, intent(inout) :: delta_ij_loc(Nstates,N_det,2) + double precision, external :: diag_H_mat_elem_fock + double precision :: haa, contrib, c_alpha(N_states) + + + + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) + + call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) + + slave_sum_alpha2(:,iproc) += c_alpha(:)**2 + if(contrib < sb(iproc)%mini) then + call add_to_selection_buffer(sb(iproc), alpha, contrib) + end if +end subroutine + + BEGIN_PROVIDER [ logical, initialize_E0_denominator ] implicit none BEGIN_DOC diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f index 27787cf2..901940ed 100644 --- a/plugins/shiftedbk/shifted_bk_slave.irp.f +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -1,8 +1,15 @@ -program bk_slave +program shifted_bk implicit none BEGIN_DOC ! Helper subroutine to compute the dress in distributed mode. END_DOC - call dress_slave + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + !call diagonalize_CI() + call dress_slave() end diff --git a/scripts/ezfio_interface/qp_convert_output_to_ezfio.py b/scripts/ezfio_interface/qp_convert_output_to_ezfio.py index ab9d7c65..8d24ad85 100755 --- a/scripts/ezfio_interface/qp_convert_output_to_ezfio.py +++ b/scripts/ezfio_interface/qp_convert_output_to_ezfio.py @@ -272,7 +272,7 @@ def write_ezfio(res, filename): # # INPUT - # {% for lanel,zcore, l_block in l_atom $} + # {% for label,zcore, l_block in l_atom $} # #local l_block l=0} # {label} GEN {zcore} {len(l_block)-1 #lmax_block} # {% for l_param in l_block%} @@ -337,6 +337,7 @@ def write_ezfio(res, filename): l_max_block = max(len(i) for i in matrix_unlocal_unpad) k_max = max([len(item) for row in matrix_unlocal_unpad for item in row]) + matrix_unlocal_semipaded = [[pad(item, k_max, [0., 2, 0.]) for item in row] for row in matrix_unlocal_unpad] empty_row = [[0., 2, 0.] for k in range(l_max_block)] diff --git a/src/Bitmask/bitmasks.irp.f b/src/Bitmask/bitmasks.irp.f index 4eac3e0a..efb39119 100644 --- a/src/Bitmask/bitmasks.irp.f +++ b/src/Bitmask/bitmasks.irp.f @@ -559,7 +559,7 @@ END_PROVIDER n_core_inact_act_orb = 0 do i = 1, N_int reunion_of_core_inact_act_bitmask(i,1) = ior(reunion_of_core_inact_bitmask(i,1),cas_bitmask(i,1,1)) - reunion_of_core_inact_act_bitmask(i,2) = ior(reunion_of_core_inact_bitmask(i,2),cas_bitmask(i,1,1)) + reunion_of_core_inact_act_bitmask(i,2) = ior(reunion_of_core_inact_bitmask(i,2),cas_bitmask(i,2,1)) n_core_inact_act_orb +=popcnt(reunion_of_core_inact_act_bitmask(i,1)) enddo END_PROVIDER diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 8530fa64..d01c80ff 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -321,21 +321,24 @@ end subroutine END_DOC call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, & - psi_det_sorted_bit, psi_coef_sorted_bit) + psi_det_sorted_bit, psi_coef_sorted_bit, N_states) END_PROVIDER -subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out) +subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out, N_st) use bitmasks implicit none - integer, intent(in) :: Ndet + integer, intent(in) :: Ndet, N_st integer(bit_kind), intent(in) :: det_in (N_int,2,psi_det_size) - double precision , intent(in) :: coef_in(psi_det_size,N_states) + double precision , intent(in) :: coef_in(psi_det_size,N_st) integer(bit_kind), intent(out) :: det_out (N_int,2,psi_det_size) - double precision , intent(out) :: coef_out(psi_det_size,N_states) + double precision , intent(out) :: coef_out(psi_det_size,N_st) BEGIN_DOC ! Determinants are sorted are sorted according to their det_search_key. ! Useful to accelerate the search of a random determinant in the wave ! function. + ! + ! /!\ The first dimension of coef_out and coef_in need to be psi_det_size + ! END_DOC integer :: i,j,k integer, allocatable :: iorder(:) @@ -356,7 +359,7 @@ subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out) det_out(j,1,i) = det_in(j,1,iorder(i)) det_out(j,2,i) = det_in(j,2,iorder(i)) enddo - do k=1,N_states + do k=1,N_st coef_out(i,k) = coef_in(iorder(i),k) enddo enddo diff --git a/src/Determinants/psi_cas.irp.f b/src/Determinants/psi_cas.irp.f index 591843f7..58a71fbc 100644 --- a/src/Determinants/psi_cas.irp.f +++ b/src/Determinants/psi_cas.irp.f @@ -54,7 +54,7 @@ END_PROVIDER ! function. END_DOC call sort_dets_by_det_search_key(N_det_cas, psi_cas, psi_cas_coef, & - psi_cas_sorted_bit, psi_cas_coef_sorted_bit) + psi_cas_sorted_bit, psi_cas_coef_sorted_bit, N_states) END_PROVIDER @@ -107,7 +107,7 @@ END_PROVIDER ! function. END_DOC call sort_dets_by_det_search_key(N_det_cas, psi_non_cas, psi_non_cas_coef, & - psi_non_cas_sorted_bit, psi_non_cas_coef_sorted_bit) + psi_non_cas_sorted_bit, psi_non_cas_coef_sorted_bit, N_states) END_PROVIDER diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 273b8352..95339664 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -190,7 +190,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8) vt = 0.d0 do sh=1,shortcut(0,1) - !$OMP DO + !$OMP DO SCHEDULE(static,1) do sh2=sh,shortcut(0,1) exa = 0 do ni=1,Nint diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 1f076057..ffa7c269 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -649,7 +649,7 @@ subroutine create_wf_of_psi_bilinear_matrix(truncate) do k=1,N_states psi_coef_sorted_bit(idx,k) = psi_bilinear_matrix(i,j,k) !$OMP ATOMIC - norm(k) += psi_bilinear_matrix(i,j,k) + norm(k) += psi_bilinear_matrix(i,j,k)*psi_bilinear_matrix(i,j,k) enddo endif enddo diff --git a/src/MO_Basis/ao_ortho_canonical.irp.f b/src/MO_Basis/ao_ortho_canonical.irp.f index ab93f94e..1cc3444e 100644 --- a/src/MO_Basis/ao_ortho_canonical.irp.f +++ b/src/MO_Basis/ao_ortho_canonical.irp.f @@ -115,6 +115,7 @@ END_PROVIDER ao_num,ao_ortho_canonical_coef,size(ao_ortho_canonical_coef,1), & ao_ortho_canonical_num) + else double precision, allocatable :: S(:,:) @@ -134,13 +135,6 @@ END_PROVIDER S, size(S,1), & 0.d0, ao_ortho_canonical_coef, size(ao_ortho_canonical_coef,1)) -!integer :: j -!do i=1,ao_num -! do j=1,ao_num -! print *, i,j, ao_ortho_canonical_coef(i,j) -! enddo -!enddo -!stop deallocate(S) endif END_PROVIDER diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 97fded04..1f775413 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -323,7 +323,7 @@ IRP_ENDIF stop 'Unable to set ZMQ_LINGER on push socket' endif - rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,3,4) + rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,5,4) if (rc /= 0) then stop 'Unable to set ZMQ_SNDHWM on push socket' endif