diff --git a/data/pseudo/tm b/data/pseudo/tm new file mode 100644 index 00000000..531445f8 --- /dev/null +++ b/data/pseudo/tm @@ -0,0 +1,32 @@ +Ag GEN 36 2 +4 +11.074 1 1.712 +-166.201 2 1.391 +255.676 2 1.194 +-91.757 2 1.033 +3 +11.074 1 0.897 +-22.6472 2 1.226 +16.8557 2 0.9789 +4 +9.524 1 12.668 +227.659 2 1.662 +-363.576 2 1.4 +150.286 2 1.205 + +Au GEN 68 2 +4 +10.881 1 2.286 +-97.386 2 1.088 +270.134 2 1.267 +-171.733 2 1.499 +3 +10.721 1 1.38 +-63.222 2 1.111 +60.634 2 0.987 +4 +9.383 1 11. +225.822 2 1.66 +286.233 2 1.342 +-497.561 2 1.437 + diff --git a/data/pseudo/tn_df b/data/pseudo/tn_df index 2ba941be..988312b0 100644 --- a/data/pseudo/tn_df +++ b/data/pseudo/tn_df @@ -780,6 +780,27 @@ Ar GEN 10 2 -1386.79918148 2 4.23753203 1350.57102634 2 6.12344921 - +Ag GEN 36 2 + 6 + 11.00000000 1 7.02317516 + 178.71479273 2 1.36779344 + -206.54166000 2 1.85990342 + 92.80009949 2 2.70385827 + -91.80009949 2 1.21149868 + 77.25492677 3 2.46247055 + 6 + -19159.46923372 2 2.56205947 + 19178.09022506 2 3.28075183 + -19956.12207989 2 3.86486918 + 12405.48540805 2 2.42437953 + -8569.95659418 2 5.14643113 + 16121.59197935 2 4.79642660 + 6 + -1054.66284551 2 1.92427691 + 1072.38275494 2 1.94184452 + -1.15533162 2 27.95704514 + 88.48945385 2 1.25545336 + -0.36033231 2 10.04954095 + -85.97371403 2 1.49011553 diff --git a/ocaml/Element.ml b/ocaml/Element.ml index 6bc2de4e..df85663f 100644 --- a/ocaml/Element.ml +++ b/ocaml/Element.ml @@ -9,6 +9,7 @@ type t = |Li|Be |B |C |N |O |F |Ne |Na|Mg |Al|Si|P |S |Cl|Ar |K |Ca|Sc|Ti|V |Cr|Mn|Fe|Co|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr +|Rb|Sr|Y |Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|In|Sn|Sb|Te|I |Xe with sexp let of_string x = @@ -50,6 +51,24 @@ let of_string x = | "Se" | "Selenium" -> Se | "Br" | "Bromine" -> Br | "Kr" | "Krypton" -> Kr +| "Rb" | "Rubidium" -> Rb +| "Sr" | "Strontium" -> Sr +| "Y" | "Yttrium" -> Y +| "Zr" | "Zirconium" -> Zr +| "Nb" | "Niobium" -> Nb +| "Mo" | "Molybdenum" -> Mo +| "Tc" | "Technetium" -> Tc +| "Ru" | "Ruthenium" -> Ru +| "Rh" | "Rhodium" -> Rh +| "Pd" | "Palladium" -> Pd +| "Ag" | "Silver" -> Ag +| "Cd" | "Cadmium" -> Cd +| "In" | "Indium" -> In +| "Sn" | "Tin" -> Sn +| "Sb" | "Antimony" -> Sb +| "Te" | "Tellurium" -> Te +| "I" | "Iodine" -> I +| "Xe" | "Xenon" -> Xe | x -> raise (ElementError ("Element "^x^" unknown")) @@ -91,6 +110,24 @@ let to_string = function | Se -> "Se" | Br -> "Br" | Kr -> "Kr" +| Rb -> "Rb" +| Sr -> "Sr" +| Y -> "Y" +| Zr -> "Zr" +| Nb -> "Nb" +| Mo -> "Mo" +| Tc -> "Tc" +| Ru -> "Ru" +| Rh -> "Rh" +| Pd -> "Pd" +| Ag -> "Ag" +| Cd -> "Cd" +| In -> "In" +| Sn -> "Sn" +| Sb -> "Sb" +| Te -> "Te" +| I -> "I" +| Xe -> "Xe" let to_long_string = function @@ -131,6 +168,24 @@ let to_long_string = function | Se -> "Selenium" | Br -> "Bromine" | Kr -> "Krypton" +| Rb -> "Rubidium" +| Sr -> "Strontium" +| Y -> "Yttrium" +| Zr -> "Zirconium" +| Nb -> "Niobium" +| Mo -> "Molybdenum" +| Tc -> "Technetium" +| Ru -> "Ruthenium" +| Rh -> "Rhodium" +| Pd -> "Palladium" +| Ag -> "Silver" +| Cd -> "Cadmium" +| In -> "Indium" +| Sn -> "Tin" +| Sb -> "Antimony" +| Te -> "Tellurium" +| I -> "Iodine" +| Xe -> "Xenon" let to_charge c = @@ -172,47 +227,83 @@ let to_charge c = | Se -> 34 | Br -> 35 | Kr -> 36 + | Rb -> 37 + | Sr -> 38 + | Y -> 39 + | Zr -> 40 + | Nb -> 41 + | Mo -> 42 + | Tc -> 43 + | Ru -> 44 + | Rh -> 45 + | Pd -> 46 + | Ag -> 47 + | Cd -> 48 + | In -> 49 + | Sn -> 50 + | Sb -> 51 + | Te -> 52 + | I -> 53 + | Xe -> 54 in Charge.of_int result let of_charge c = match (Charge.to_int c) with -| 0 -> X -| 1 -> H -| 2 -> He -| 3 -> Li -| 4 -> Be -| 5 -> B -| 6 -> C -| 7 -> N -| 8 -> O -| 9 -> F -| 10 -> Ne -| 11 -> Na -| 12 -> Mg -| 13 -> Al -| 14 -> Si -| 15 -> P -| 16 -> S -| 17 -> Cl -| 18 -> Ar -| 19 -> K -| 20 -> Ca -| 21 -> Sc -| 22 -> Ti -| 23 -> V -| 24 -> Cr -| 25 -> Mn -| 26 -> Fe -| 27 -> Co -| 28 -> Ni -| 29 -> Cu -| 30 -> Zn -| 31 -> Ga -| 32 -> Ge -| 33 -> As -| 34 -> Se -| 35 -> Br -| 36 -> Kr +| 0 -> X +| 1 -> H +| 2 -> He +| 3 -> Li +| 4 -> Be +| 5 -> B +| 6 -> C +| 7 -> N +| 8 -> O +| 9 -> F +| 10 -> Ne +| 11 -> Na +| 12 -> Mg +| 13 -> Al +| 14 -> Si +| 15 -> P +| 16 -> S +| 17 -> Cl +| 18 -> Ar +| 19 -> K +| 20 -> Ca +| 21 -> Sc +| 22 -> Ti +| 23 -> V +| 24 -> Cr +| 25 -> Mn +| 26 -> Fe +| 27 -> Co +| 28 -> Ni +| 29 -> Cu +| 30 -> Zn +| 31 -> Ga +| 32 -> Ge +| 33 -> As +| 34 -> Se +| 35 -> Br +| 36 -> Kr +| 37 -> Rb +| 38 -> Sr +| 39 -> Y +| 40 -> Zr +| 41 -> Nb +| 42 -> Mo +| 43 -> Tc +| 44 -> Ru +| 45 -> Rh +| 46 -> Pd +| 47 -> Ag +| 48 -> Cd +| 49 -> In +| 50 -> Sn +| 51 -> Sb +| 52 -> Te +| 53 -> I +| 54 -> Xe | x -> raise (ElementError ("Element of charge "^(string_of_int x)^" unknown")) @@ -255,6 +346,24 @@ let covalent_radius x = | Se -> 0.70 | Br -> 1.24 | Kr -> 1.91 + | Rb -> 2.20 + | Sr -> 1.95 + | Y -> 1.90 + | Zr -> 1.75 + | Nb -> 1.64 + | Mo -> 1.54 + | Tc -> 1.47 + | Ru -> 1.46 + | Rh -> 1.42 + | Pd -> 1.39 + | Ag -> 1.45 + | Cd -> 1.44 + | In -> 1.42 + | Sn -> 1.39 + | Sb -> 1.39 + | Te -> 1.38 + | I -> 1.39 + | Xe -> 1.40 in Units.angstrom_to_bohr *. (result x) |> Positive_float.of_float @@ -298,6 +407,24 @@ let vdw_radius x = | Se -> 1.70 | Br -> 2.10 | Kr -> 1.70 + | Rb -> 3.03 + | Sr -> 2.49 + | Y -> 0. + | Zr -> 0. + | Nb -> 0. + | Mo -> 0. + | Tc -> 0. + | Ru -> 0. + | Rh -> 0. + | Pd -> 1.63 + | Ag -> 1.72 + | Cd -> 1.58 + | In -> 1.93 + | Sn -> 2.17 + | Sb -> 2.06 + | Te -> 2.06 + | I -> 1.98 + | Xe -> 2.16 in Units.angstrom_to_bohr *. (result x) |> Positive_float.of_float @@ -341,6 +468,24 @@ let mass x = | Se -> 78.96 | Br -> 79.904 | Kr -> 83.80 + | Rb -> 85.4678 + | Sr -> 87.62 + | Y -> 88.90584 + | Zr -> 91.224 + | Nb -> 92.90637 + | Mo -> 95.95 + | Tc -> 98. + | Ru -> 101.07 + | Rh -> 102.90550 + | Pd -> 106.42 + | Ag -> 107.8682 + | Cd -> 112.414 + | In -> 114.818 + | Sn -> 118.710 + | Sb -> 121.760 + | Te -> 127.60 + | I -> 126.90447 + | Xe -> 131.293 in result x |> Positive_float.of_float diff --git a/ocaml/Element.mli b/ocaml/Element.mli index 8d9862c9..5edfdf31 100644 --- a/ocaml/Element.mli +++ b/ocaml/Element.mli @@ -6,6 +6,7 @@ type t = |Li|Be |B |C |N |O |F |Ne |Na|Mg |Al|Si|P |S |Cl|Ar |K |Ca|Sc|Ti|V |Cr|Mn|Fe|Co|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr +|Rb|Sr|Y |Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|In|Sn|Sb|Te|I |Xe with sexp (** String conversion functions *) diff --git a/plugins/Full_CI/var_pt2_ratio.irp.f b/plugins/Full_CI/var_pt2_ratio.irp.f index 3d942a30..1ea52dda 100644 --- a/plugins/Full_CI/var_pt2_ratio.irp.f +++ b/plugins/Full_CI/var_pt2_ratio.irp.f @@ -11,7 +11,7 @@ program var_pt2_ratio_run double precision, allocatable :: psi_det_save(:,:,:), psi_coef_save(:,:) - double precision :: E_fci, E_var, ratio, E_ref + double precision :: E_fci, E_var, ratio, E_ref, selection_criterion_save integer :: Nmin, Nmax pt2 = 1.d0 @@ -30,6 +30,7 @@ program var_pt2_ratio_run threshold_selectors = 1.d0 threshold_generators = 0.999d0 + selection_criterion_save = selection_criterion call diagonalize_CI call H_apply_FCI_PT2(pt2, norm_pert, H_pert_diag, N_st) E_ref = CI_energy(1) + pt2(1) @@ -46,6 +47,8 @@ program var_pt2_ratio_run Nmax = max(Nmax,Nmin+10) ! Select new determinants call H_apply_FCI(pt2, norm_pert, H_pert_diag, N_st) + selection_criterion = selection_criterion_save + SOFT_TOUCH selection_criterion selection_criterion_min selection_criterion_factor else Nmax = N_det N_det = Nmin + (Nmax-Nmin)/2 diff --git a/plugins/Hartree_Fock/Fock_matrix.irp.f b/plugins/Hartree_Fock/Fock_matrix.irp.f index 397f8f83..af9255c8 100644 --- a/plugins/Hartree_Fock/Fock_matrix.irp.f +++ b/plugins/Hartree_Fock/Fock_matrix.irp.f @@ -223,6 +223,7 @@ END_PROVIDER ao_bi_elec_integral_beta_tmp = 0.d0 !$OMP DO SCHEDULE(dynamic) + !DIR$ NOVECTOR do i8=0_8,ao_integrals_map%map_size n_elements = n_elements_max call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) diff --git a/plugins/Hartree_Fock/damping_SCF.irp.f b/plugins/Hartree_Fock/damping_SCF.irp.f index d383eb74..aa6f02b0 100644 --- a/plugins/Hartree_Fock/damping_SCF.irp.f +++ b/plugins/Hartree_Fock/damping_SCF.irp.f @@ -96,7 +96,7 @@ subroutine damping_SCF a = (E_new + E - 2.d0*E_half)*2.d0 b = -E_new - 3.d0*E + 4.d0*E_half - lambda = -lambda*b/a + lambda = -lambda*b/(a+1.d-16) D_alpha = (1.d0-lambda) * D_alpha + lambda * D_new_alpha D_beta = (1.d0-lambda) * D_beta + lambda * D_new_beta delta_E = HF_energy - E diff --git a/plugins/QmcChem/dressed_dmc.irp.f b/plugins/QmcChem/dressed_dmc.irp.f new file mode 100644 index 00000000..0a48e871 --- /dev/null +++ b/plugins/QmcChem/dressed_dmc.irp.f @@ -0,0 +1,73 @@ +program dressed_dmc + implicit none + double precision :: E0, hij + double precision, allocatable :: H_jj(:), energies(:), delta_jj(:), cj(:), hj(:) + integer :: i + double precision, external :: diag_h_mat_elem + + if (.not.read_wf) then + stop 'read_wf should be true' + endif + + PROVIDE mo_bielec_integrals_in_map + allocate ( H_jj(N_det), delta_jj(N_det), hj(N_det), cj(N_det), energies(N_states) ) + + ! Read + ! -=-=-=-==-=-=-= + + character*(32) :: w, w2 + integer :: k + do while (.True.) + read(*,*) w + if ( trim(w) == 'Ci_h_psidet' ) then + exit + endif + enddo + do i=1,N_det + read(*,*) k, w, hj(i) + enddo + + do while (.True.) + read(*,*) w + if ( trim(w) == 'Ci_overlap_psidet' ) then + exit + endif + enddo + do i=1,N_det + read(*,*) k, w, cj(i) + enddo + + read(*,*) + read(*,*) w, w2, E0 + print *, 'E0=', E0 + print *, 'Ndet = ', N_det + + ! Compute delta_ii + ! -=-=-=-==-=-=-=- + + do i=1,N_det + call i_H_psi(psi_det(1,1,i),psi_det,cj,N_int,N_det,size(psi_coef,1),N_states,energies) + if (dabs(cj(i)) < 1.d-6) then + delta_jj(i) = 0.d0 + else + delta_jj(i) = (hj(i) - energies(1))/cj(i) + endif + H_jj(i) = diag_h_mat_elem(psi_det(1,1,i),N_int) + delta_jj(i) + print *, 'Delta_jj(',i,') = ', Delta_jj(i), H_jj(i) + enddo + + + call davidson_diag_hjj(psi_det,psi_coef,H_jj,energies,size(psi_coef,1),N_det,N_states,N_int,6) + + call save_wavefunction + call write_spindeterminants + + E0 = 0.d0 + do i=1,N_det + call i_H_psi(psi_det(1,1,i),psi_det,psi_coef(1,1),N_int,N_det,size(psi_coef,1),N_states,energies) + E0 += psi_coef(i,1) * energies(1) + enddo + print *, 'Trial energy: ', E0 + nuclear_repulsion + + deallocate (H_jj, delta_jj, energies, cj) +end diff --git a/src/Determinants/H_apply_zmq.template.f b/src/Determinants/H_apply_zmq.template.f index 2faceb77..fde09a8f 100644 --- a/src/Determinants/H_apply_zmq.template.f +++ b/src/Determinants/H_apply_zmq.template.f @@ -174,7 +174,7 @@ subroutine $subroutine_slave(thread, iproc) fock_diag_tmp, i_generator, iproc $params_post) endif - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,1) + call task_done_to_taskserver(zmq_to_qp_run_socket, worker_id, task_id) call push_pt2(zmq_socket_push,pt2,norm_pert,H_pert_diag,i_generator,N_st,task_id) enddo diff --git a/src/Determinants/diagonalize_CI.irp.f b/src/Determinants/diagonalize_CI.irp.f index b533bed2..d4716b86 100644 --- a/src/Determinants/diagonalize_CI.irp.f +++ b/src/Determinants/diagonalize_CI.irp.f @@ -36,225 +36,223 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, CI_electronic_energy, (N_states_diag) ] &BEGIN_PROVIDER [ double precision, CI_eigenvectors, (N_det,N_states_diag) ] &BEGIN_PROVIDER [ double precision, CI_eigenvectors_s2, (N_states_diag) ] - BEGIN_DOC - ! Eigenvectors/values of the CI matrix - END_DOC - implicit none - double precision :: ovrlp,u_dot_v - integer :: i_good_state - integer, allocatable :: index_good_state_array(:) - logical, allocatable :: good_state_array(:) - double precision, allocatable :: s2_values_tmp(:) - integer :: i_other_state - double precision, allocatable :: eigenvectors(:,:), eigenvalues(:) - integer :: i_state - double precision :: s2,e_0 - integer :: i,j,k - double precision, allocatable :: s2_eigvalues(:) - double precision, allocatable :: e_array(:) - integer, allocatable :: iorder(:) - - ! Guess values for the "N_states_diag" states of the CI_eigenvectors - do j=1,min(N_states_diag,N_det) - do i=1,N_det - CI_eigenvectors(i,j) = psi_coef(i,j) - enddo - enddo - - do j=N_det+1,N_states_diag - do i=1,N_det - CI_eigenvectors(i,j) = 0.d0 - enddo - enddo - - if (diag_algorithm == "Davidson") then - - call davidson_diag(psi_det,CI_eigenvectors,CI_electronic_energy, & - size(CI_eigenvectors,1),N_det,N_states_diag,N_int,output_determinants) - do j=1,N_states_diag - call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),CI_eigenvectors_s2(j)) - enddo - - - else if (diag_algorithm == "Lapack") then - - allocate (eigenvectors(size(H_matrix_all_dets,1),N_det)) - allocate (eigenvalues(N_det)) - call lapack_diag(eigenvalues,eigenvectors, & - H_matrix_all_dets,size(H_matrix_all_dets,1),N_det) - CI_electronic_energy(:) = 0.d0 - if (s2_eig) then - i_state = 0 - allocate (s2_eigvalues(N_det)) - allocate(index_good_state_array(N_det),good_state_array(N_det)) - good_state_array = .False. - do j=1,N_det - call get_s2_u0(psi_det,eigenvectors(1,j),N_det,size(eigenvectors,1),s2) - s2_eigvalues(j) = s2 - ! Select at least n_states states with S^2 values closed to "expected_s2" - if(dabs(s2-expected_s2).le.0.3d0)then - i_state +=1 - index_good_state_array(i_state) = j - good_state_array(j) = .True. - endif - if(i_state.eq.N_states) then - exit - endif - enddo - if(i_state .ne.0)then - ! Fill the first "i_state" states that have a correct S^2 value - do j = 1, i_state - do i=1,N_det - CI_eigenvectors(i,j) = eigenvectors(i,index_good_state_array(j)) - enddo - CI_electronic_energy(j) = eigenvalues(index_good_state_array(j)) - CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(j)) + BEGIN_DOC + ! Eigenvectors/values of the CI matrix + END_DOC + implicit none + double precision :: ovrlp,u_dot_v + integer :: i_good_state + integer, allocatable :: index_good_state_array(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: s2_values_tmp(:) + integer :: i_other_state + double precision, allocatable :: eigenvectors(:,:), eigenvalues(:) + integer :: i_state + double precision :: s2,e_0 + integer :: i,j,k + double precision, allocatable :: s2_eigvalues(:) + double precision, allocatable :: e_array(:) + integer, allocatable :: iorder(:) + + ! Guess values for the "N_states_diag" states of the CI_eigenvectors + do j=1,min(N_states_diag,N_det) + do i=1,N_det + CI_eigenvectors(i,j) = psi_coef(i,j) + enddo + enddo + + do j=N_det+1,N_states_diag + do i=1,N_det + CI_eigenvectors(i,j) = 0.d0 + enddo + enddo + + if (diag_algorithm == "Davidson") then + + call davidson_diag(psi_det,CI_eigenvectors,CI_electronic_energy,& + size(CI_eigenvectors,1),N_det,N_states_diag,N_int,output_determinants) + do j=1,N_states_diag + call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),CI_eigenvectors_s2(j)) + enddo + + + else if (diag_algorithm == "Lapack") then + + allocate (eigenvectors(size(H_matrix_all_dets,1),N_det)) + allocate (eigenvalues(N_det)) + call lapack_diag(eigenvalues,eigenvectors, & + H_matrix_all_dets,size(H_matrix_all_dets,1),N_det) + CI_electronic_energy(:) = 0.d0 + if (s2_eig) then + i_state = 0 + allocate (s2_eigvalues(N_det)) + allocate(index_good_state_array(N_det),good_state_array(N_det)) + good_state_array = .False. + do j=1,N_det + call get_s2_u0(psi_det,eigenvectors(1,j),N_det,size(eigenvectors,1),s2) + s2_eigvalues(j) = s2 + ! Select at least n_states states with S^2 values closed to "expected_s2" + if(dabs(s2-expected_s2).le.0.3d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif enddo - i_other_state = 0 - do j = 1, N_det - if(good_state_array(j))cycle - i_other_state +=1 - if(i_state+i_other_state.gt.n_states_diag)then - exit - endif - call get_s2_u0(psi_det,eigenvectors(1,j),N_det,size(eigenvectors,1),s2) - do i=1,N_det - CI_eigenvectors(i,i_state+i_other_state) = eigenvectors(i,j) - enddo - CI_electronic_energy(i_state+i_other_state) = eigenvalues(j) - CI_eigenvectors_s2(i_state+i_other_state) = s2 - enddo - + if(i_state .ne.0)then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i=1,N_det + CI_eigenvectors(i,j) = eigenvectors(i,index_good_state_array(j)) + enddo + CI_electronic_energy(j) = eigenvalues(index_good_state_array(j)) + CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states_diag)then + exit + endif + call get_s2_u0(psi_det,eigenvectors(1,j),N_det,size(eigenvectors,1),s2) + do i=1,N_det + CI_eigenvectors(i,i_state+i_other_state) = eigenvectors(i,j) + enddo + CI_electronic_energy(i_state+i_other_state) = eigenvalues(j) + CI_eigenvectors_s2(i_state+i_other_state) = s2 + enddo + + else + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find any state with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for diagonalize_s2 to be .True. or just enlarge the CI space' + print*,'' + do j=1,min(N_states_diag,N_det) + do i=1,N_det + CI_eigenvectors(i,j) = eigenvectors(i,j) + enddo + CI_electronic_energy(j) = eigenvalues(j) + CI_eigenvectors_s2(j) = s2_eigvalues(j) + enddo + endif deallocate(index_good_state_array,good_state_array) - - else - print*,'' - print*,'!!!!!!!! WARNING !!!!!!!!!' - print*,' Within the ',N_det,'determinants selected' - print*,' and the ',N_states_diag,'states requested' - print*,' We did not find any state with S^2 values close to ',expected_s2 - print*,' We will then set the first N_states eigenvectors of the H matrix' - print*,' as the CI_eigenvectors' - print*,' You should consider more states and maybe ask for diagonalize_s2 to be .True. or just enlarge the CI space' - print*,'' - do j=1,min(N_states_diag,N_det) + deallocate(s2_eigvalues) + else + ! Select the "N_states_diag" states of lowest energy + do j=1,min(N_det,N_states_diag) + call get_s2_u0(psi_det,eigenvectors(1,j),N_det,N_det,s2) do i=1,N_det CI_eigenvectors(i,j) = eigenvectors(i,j) enddo - CI_electronic_energy(j) = eigenvalues(j) - CI_eigenvectors_s2(j) = s2_eigvalues(j) + CI_electronic_energy(j) = eigenvalues(j) + CI_eigenvectors_s2(j) = s2 enddo - endif - deallocate(s2_eigvalues) - else - ! Select the "N_states_diag" states of lowest energy - do j=1,min(N_det,N_states_diag) - call get_s2_u0(psi_det,eigenvectors(1,j),N_det,N_det,s2) - do i=1,N_det - CI_eigenvectors(i,j) = eigenvectors(i,j) - enddo - CI_electronic_energy(j) = eigenvalues(j) - CI_eigenvectors_s2(j) = s2 - enddo - endif - deallocate(eigenvectors,eigenvalues) - endif - - if(diagonalize_s2.and.n_states_diag > 1.and. n_det >= n_states_diag)then - ! Diagonalizing S^2 within the "n_states_diag" states found - allocate(s2_eigvalues(N_states_diag)) - call diagonalize_s2_betweenstates(psi_det,CI_eigenvectors,n_det,size(psi_det,3),size(CI_eigenvectors,1),min(n_states_diag,n_det),s2_eigvalues) - - do j = 1, N_states_diag - do i = 1, N_det - psi_coef(i,j) = CI_eigenvectors(i,j) - enddo - enddo - - if(s2_eig)then - - ! Browsing the "n_states_diag" states and getting the lowest in energy "n_states" ones that have the S^2 value - ! closer to the "expected_s2" set as input - - allocate(index_good_state_array(N_det),good_state_array(N_det)) - good_state_array = .False. - i_state = 0 - do j = 1, N_states_diag - if(dabs(s2_eigvalues(j)-expected_s2).le.0.3d0)then - good_state_array(j) = .True. - i_state +=1 - index_good_state_array(i_state) = j endif - enddo - ! Sorting the i_state good states by energy - allocate(e_array(i_state),iorder(i_state)) - do j = 1, i_state - do i = 1, N_det - CI_eigenvectors(i,j) = psi_coef(i,index_good_state_array(j)) - enddo - CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(j)) - call u0_H_u_0(e_0,CI_eigenvectors(1,j),n_det,psi_det,N_int) - CI_electronic_energy(j) = e_0 - e_array(j) = e_0 - iorder(j) = j - enddo - call dsort(e_array,iorder,i_state) - do j = 1, i_state - CI_electronic_energy(j) = e_array(j) - CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(iorder(j))) - do i = 1, N_det - CI_eigenvectors(i,j) = psi_coef(i,index_good_state_array(iorder(j))) - enddo -! call u0_H_u_0(e_0,CI_eigenvectors(1,j),n_det,psi_det,N_int) -! print*,'e = ',CI_electronic_energy(j) -! print*,' = ',e_0 -! call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),s2) -! print*,'s^2 = ',CI_eigenvectors_s2(j) -! print*,'= ',s2 - enddo - deallocate(e_array,iorder) - - ! Then setting the other states without any specific energy order - i_other_state = 0 - do j = 1, N_states_diag - if(good_state_array(j))cycle - i_other_state +=1 - do i = 1, N_det - CI_eigenvectors(i,i_state + i_other_state) = psi_coef(i,j) - enddo - CI_eigenvectors_s2(i_state + i_other_state) = s2_eigvalues(j) - call u0_H_u_0(e_0,CI_eigenvectors(1,i_state + i_other_state),n_det,psi_det,N_int) - CI_electronic_energy(i_state + i_other_state) = e_0 - enddo - deallocate(index_good_state_array,good_state_array) - - - else - - ! Sorting the N_states_diag by energy, whatever the S^2 value is - - allocate(e_array(n_states_diag),iorder(n_states_diag)) - do j = 1, N_states_diag - call u0_H_u_0(e_0,CI_eigenvectors(1,j),n_det,psi_det,N_int) - e_array(j) = e_0 - iorder(j) = j - enddo - call dsort(e_array,iorder,n_states_diag) - do j = 1, N_states_diag - CI_electronic_energy(j) = e_array(j) - do i = 1, N_det - CI_eigenvectors(i,j) = psi_coef(i,iorder(j)) - enddo - CI_eigenvectors_s2(j) = s2_eigvalues(iorder(j)) - enddo - deallocate(e_array,iorder) + deallocate(eigenvectors,eigenvalues) endif - deallocate(s2_eigvalues) - endif - - + + if(diagonalize_s2.and.n_states_diag > 1.and. n_det >= n_states_diag)then + ! Diagonalizing S^2 within the "n_states_diag" states found + allocate(s2_eigvalues(N_states_diag)) + call diagonalize_s2_betweenstates(psi_det,CI_eigenvectors,n_det,size(psi_det,3),size(CI_eigenvectors,1),min(n_states_diag,n_det),s2_eigvalues) + + do j = 1, N_states_diag + do i = 1, N_det + psi_coef(i,j) = CI_eigenvectors(i,j) + enddo + enddo + + if(s2_eig)then + + ! Browsing the "n_states_diag" states and getting the lowest in energy "n_states" ones that have the S^2 value + ! closer to the "expected_s2" set as input + + allocate(index_good_state_array(N_det),good_state_array(N_det)) + good_state_array = .False. + i_state = 0 + do j = 1, N_states_diag + if(dabs(s2_eigvalues(j)-expected_s2).le.0.3d0)then + good_state_array(j) = .True. + i_state +=1 + index_good_state_array(i_state) = j + endif + enddo + ! Sorting the i_state good states by energy + allocate(e_array(i_state),iorder(i_state)) + do j = 1, i_state + do i = 1, N_det + CI_eigenvectors(i,j) = psi_coef(i,index_good_state_array(j)) + enddo + CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(j)) + call u0_H_u_0(e_0,CI_eigenvectors(1,j),n_det,psi_det,N_int) + CI_electronic_energy(j) = e_0 + e_array(j) = e_0 + iorder(j) = j + enddo + call dsort(e_array,iorder,i_state) + do j = 1, i_state + CI_electronic_energy(j) = e_array(j) + CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(iorder(j))) + do i = 1, N_det + CI_eigenvectors(i,j) = psi_coef(i,index_good_state_array(iorder(j))) + enddo + ! call u0_H_u_0(e_0,CI_eigenvectors(1,j),n_det,psi_det,N_int) + ! print*,'e = ',CI_electronic_energy(j) + ! print*,' = ',e_0 + ! call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),s2) + ! print*,'s^2 = ',CI_eigenvectors_s2(j) + ! print*,'= ',s2 + enddo + deallocate(e_array,iorder) + + ! Then setting the other states without any specific energy order + i_other_state = 0 + do j = 1, N_states_diag + if(good_state_array(j))cycle + i_other_state +=1 + do i = 1, N_det + CI_eigenvectors(i,i_state + i_other_state) = psi_coef(i,j) + enddo + CI_eigenvectors_s2(i_state + i_other_state) = s2_eigvalues(j) + call u0_H_u_0(e_0,CI_eigenvectors(1,i_state + i_other_state),n_det,psi_det,N_int) + CI_electronic_energy(i_state + i_other_state) = e_0 + enddo + deallocate(index_good_state_array,good_state_array) + + else + + ! Sorting the N_states_diag by energy, whatever the S^2 value is + + allocate(e_array(n_states_diag),iorder(n_states_diag)) + do j = 1, N_states_diag + call u0_H_u_0(e_0,CI_eigenvectors(1,j),n_det,psi_det,N_int) + e_array(j) = e_0 + iorder(j) = j + enddo + call dsort(e_array,iorder,n_states_diag) + do j = 1, N_states_diag + CI_electronic_energy(j) = e_array(j) + do i = 1, N_det + CI_eigenvectors(i,j) = psi_coef(i,iorder(j)) + enddo + CI_eigenvectors_s2(j) = s2_eigvalues(iorder(j)) + enddo + deallocate(e_array,iorder) + endif + deallocate(s2_eigvalues) + + endif + END_PROVIDER - + subroutine diagonalize_CI implicit none BEGIN_DOC diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index 34d6feb9..da333b1e 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -207,6 +207,7 @@ subroutine create_microlist(minilist, N_minilist, key_mask, microlist, idx_micro do j=1,n_element(1) nt = list(j,1) idx_microlist(cur_microlist(nt)) = i + ! TODO : Page faults do k=1,Nint microlist(k,1,cur_microlist(nt)) = minilist(k,1,i) microlist(k,2,cur_microlist(nt)) = minilist(k,2,i) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index ec7eb76d..6a8ad1cc 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1060,6 +1060,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max, i_in_coef = idx_key(idx(ii)) !DIR$ FORCEINLINE call i_H_j(keys(1,1,i_in_key),key,Nint,hij) + ! TODO : Cache misses i_H_psi_array(1) = i_H_psi_array(1) + coef(i_in_coef,1)*hij enddo diff --git a/src/Integrals_Bielec/ao_bi_integrals.irp.f b/src/Integrals_Bielec/ao_bi_integrals.irp.f index b7c75fb8..2c46d42d 100644 --- a/src/Integrals_Bielec/ao_bi_integrals.irp.f +++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f @@ -351,13 +351,11 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] real :: map_mb if (read_ao_integrals) then - integer :: load_ao_integrals print*,'Reading the AO integrals' - if (load_ao_integrals(trim(ezfio_filename)//'/work/ao_integrals.bin') == 0) then + call map_load_from_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) print*, 'AO integrals provided' ao_bielec_integrals_in_map = .True. return - endif endif print*, 'Providing the AO integrals' @@ -371,7 +369,7 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] call new_parallel_job(zmq_to_qp_run_socket,'ao_integrals') do l=1,ao_num - write(task,*) l + write(task,*) "triangle ", l call add_task_to_taskserver(zmq_to_qp_run_socket,task) enddo @@ -402,8 +400,10 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' ao_bielec_integrals_in_map = .True. + if (write_ao_integrals) then - call dump_ao_integrals(trim(ezfio_filename)//'/work/ao_integrals.bin') + call ezfio_set_work_empty(.False.) + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) call ezfio_set_integrals_bielec_disk_access_ao_integrals("Read") endif diff --git a/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f b/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f index f15376b5..ae8248a6 100644 --- a/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f +++ b/src/Integrals_Bielec/ao_bielec_integrals_in_map_slave.irp.f @@ -103,12 +103,8 @@ subroutine ao_bielec_integrals_in_map_slave(thread,iproc) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) if (task_id == 0) exit - read(task,*) l - do j=1,l-1 - call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) - call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, 0) - enddo - call compute_ao_integrals_jl(l,l,n_integrals,buffer_i,buffer_value) + read(task,*) j, l + call compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id) enddo @@ -227,9 +223,11 @@ subroutine ao_bielec_integrals_in_map_collector control = get_ao_map_size(ao_integrals_map) if (control /= accu) then - print *, irp_here, 'Control : ', control - print *, 'Accu : ', accu - print *, 'Some integrals were lost during the parallel computation. (2)' + print *, '' + print *, irp_here + print *, 'Control : ', control + print *, 'Accu : ', accu + print *, 'Some integrals were lost during the parallel computation.' print *, 'Try to reduce the number of threads.' stop endif diff --git a/src/Integrals_Bielec/map_integrals.irp.f b/src/Integrals_Bielec/map_integrals.irp.f index e9775eec..fdcf4038 100644 --- a/src/Integrals_Bielec/map_integrals.irp.f +++ b/src/Integrals_Bielec/map_integrals.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ type(map_type), ao_integrals_map ] call bielec_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max) sze = key_max call map_init(ao_integrals_map,sze) - print*, 'AO map initialized' + print*, 'AO map initialized : ', sze END_PROVIDER subroutine bielec_integrals_index(i,j,k,l,i1) diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 69ca0733..0a468c24 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -28,12 +28,10 @@ BEGIN_PROVIDER [ logical, mo_bielec_integrals_in_map ] mo_bielec_integrals_in_map = .True. if (read_mo_integrals) then - integer :: load_mo_integrals print*,'Reading the MO integrals' - if (load_mo_integrals(trim(ezfio_filename)//'/work/mo_integrals.bin') == 0) then - print*, 'MO integrals provided' - return - endif + call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + print*, 'MO integrals provided' + return endif call add_integrals_to_map(full_ijkl_bitmask_4) @@ -299,7 +297,8 @@ subroutine add_integrals_to_map(mask_ijkl) print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' if (write_mo_integrals) then - call dump_mo_integrals(trim(ezfio_filename)//'/work/mo_integrals.bin') + call ezfio_set_work_empty(.False.) + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) call ezfio_set_integrals_bielec_disk_access_mo_integrals("Read") endif diff --git a/src/Integrals_Monoelec/pot_mo_ints.irp.f b/src/Integrals_Monoelec/pot_mo_ints.irp.f index 50019abb..69bb654d 100644 --- a/src/Integrals_Monoelec/pot_mo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_mo_ints.irp.f @@ -6,24 +6,23 @@ BEGIN_PROVIDER [double precision, mo_nucl_elec_integral, (mo_tot_num_align,mo_to ! interaction nuclear electron on the MO basis END_DOC - mo_nucl_elec_integral = 0.d0 - !$OMP PARALLEL DO DEFAULT(none) & - !$OMP PRIVATE(i,j,i1,j1,c_j1,c_i1) & - !$OMP SHARED(mo_tot_num,ao_num,mo_coef, & - !$OMP mo_nucl_elec_integral, ao_nucl_elec_integral) - do i = 1, mo_tot_num - do j = 1, mo_tot_num - do i1 = 1,ao_num - c_i1 = mo_coef(i1,i) - do j1 = 1,ao_num - c_j1 = c_i1*mo_coef(j1,j) - mo_nucl_elec_integral(j,i) = mo_nucl_elec_integral(j,i) + & - c_j1 * ao_nucl_elec_integral(j1,i1) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO + double precision, allocatable :: X(:,:) + allocate(X(ao_num_align,mo_tot_num)) + + call dgemm('N','N',ao_num,mo_tot_num,ao_num, & + 1.d0, & + ao_nucl_elec_integral, size(ao_nucl_elec_integral,1), & + mo_coef,size(mo_coef,1), & + 0.d0, X, size(X,1)) + + call dgemm('T','N',mo_tot_num,mo_tot_num,ao_num, & + 1.d0, & + mo_coef,size(mo_coef,1), & + X, size(X,1), & + 0.d0, mo_nucl_elec_integral, size(mo_nucl_elec_integral,1)) + + deallocate(X) + END_PROVIDER @@ -36,25 +35,25 @@ BEGIN_PROVIDER [double precision, mo_nucl_elec_integral_per_atom, (mo_tot_num_al ! where Rk is the geometry of the kth atom END_DOC - mo_nucl_elec_integral_per_atom = 0.d0 - do k = 1, nucl_num - !$OMP PARALLEL DO DEFAULT(none) & - !$OMP PRIVATE(i,j,i1,j1,c_j1,c_i1) & - !$OMP SHARED(mo_tot_num,ao_num,mo_coef, & - !$OMP mo_nucl_elec_integral_per_atom, ao_nucl_elec_integral_per_atom,k) - do i = 1, mo_tot_num - do j = 1, mo_tot_num - do i1 = 1,ao_num - c_i1 = mo_coef(i1,i) - do j1 = 1,ao_num - c_j1 = c_i1*mo_coef(j1,j) - mo_nucl_elec_integral_per_atom(j,i,k) = mo_nucl_elec_integral_per_atom(j,i,k) + & - c_j1 * ao_nucl_elec_integral_per_atom(j1,i1,k) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO + allocate(X(ao_num_align,mo_tot_num)) + double precision, allocatable :: X(:,:) + + do k = 1, nucl_num + + call dgemm('N','N',ao_num,mo_tot_num,ao_num, & + 1.d0, & + ao_nucl_elec_integral_per_atom, size(ao_nucl_elec_integral_per_atom,1),& + mo_coef,size(mo_coef,1), & + 0.d0, X, size(X,1)) + + call dgemm('T','N',mo_tot_num,mo_tot_num,ao_num, & + 1.d0, & + mo_coef,size(mo_coef,1), & + X, size(X,1), & + 0.d0, mo_nucl_elec_integral_per_atom(1,1,k), size(mo_nucl_elec_integral_per_atom,1)) + enddo + + deallocate(X) END_PROVIDER diff --git a/src/Utils/fortran_mmap.c b/src/Utils/fortran_mmap.c new file mode 100644 index 00000000..eee8337e --- /dev/null +++ b/src/Utils/fortran_mmap.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include + + +void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) +{ + int i; + int fd; + int result; + void* map; + + if (read_only == 1) + { + fd = open(filename, O_RDONLY, (mode_t)0600); + if (fd == -1) { + printf("%s:\n", filename); + perror("Error opening mmap file for reading"); + exit(EXIT_FAILURE); + } + map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); + } + else + { + fd = open(filename, O_RDWR | O_CREAT, (mode_t)0600); + if (fd == -1) { + printf("%s:\n", filename); + perror("Error opening mmap file for writing"); + exit(EXIT_FAILURE); + } + + result = lseek(fd, bytes, SEEK_SET); + if (result == -1) { + close(fd); + printf("%s:\n", filename); + perror("Error calling lseek() to stretch the file"); + exit(EXIT_FAILURE); + } + + result = write(fd, "", 1); + if (result != 1) { + close(fd); + printf("%s:\n", filename); + perror("Error writing last byte of the file"); + exit(EXIT_FAILURE); + } + + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + } + + if (map == MAP_FAILED) { + close(fd); + printf("%s:\n", filename); + perror("Error mmapping the file"); + exit(EXIT_FAILURE); + } + + *file_descr = fd; + return map; +} + +void munmap_fortran(size_t bytes, int fd, void* map) +{ + if (munmap(map, bytes) == -1) { + perror("Error un-mmapping the file"); + } + close(fd); +} diff --git a/src/Utils/map_functions.irp.f b/src/Utils/map_functions.irp.f new file mode 100644 index 00000000..68ba342c --- /dev/null +++ b/src/Utils/map_functions.irp.f @@ -0,0 +1,115 @@ +subroutine map_save_to_disk(filename,map) + use map_module + use mmap_module + implicit none + character*(*), intent(in) :: filename + type(map_type), intent(inout) :: map + type(c_ptr) :: c_pointer(3) + integer :: fd(3) + integer*8 :: i,k + integer :: j + + + if (map % consolidated) then + stop 'map already consolidated' + endif + + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., c_pointer(1)) + call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size +2_8/)) + + call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .False., c_pointer(2)) + call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) + + call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .False., c_pointer(3)) + call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) + + if (.not.associated(map%consolidated_key)) then + stop 'cannot consolidate map : consolidated_key not associated' + endif + + if (.not.associated(map%consolidated_value)) then + stop 'cannot consolidate map : consolidated_value not associated' + endif + + if (.not.associated(map%consolidated_idx)) then + stop 'cannot consolidate map : consolidated_idx not associated' + endif + + call map_sort(map) + k = 1_8 + do i=0_8, map % map_size + map % consolidated_idx (i+1) = k + do j=1, map % map(i) % n_elements + map % consolidated_value(k) = map % map(i) % value(j) + map % consolidated_key (k) = map % map(i) % key(j) + k = k+1_8 + enddo + deallocate(map % map(i) % value) + deallocate(map % map(i) % key) + map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) + map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) + enddo + map % consolidated_idx (map % map_size + 2_8) = k + map % consolidated = .True. + + +! call munmap( (/ map % map_size + 2_8 /), 8, fd(1), c_pointer(1)) +! call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) +! call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size +2_8/)) +! +! call munmap( (/ map % n_elements /), cache_key_kind, fd(2), c_pointer(2)) +! call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) +! call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) +! +! call munmap( (/ map % n_elements /), integral_kind, fd(3), c_pointer(3)) +! call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) +! call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) + +end + +subroutine map_load_from_disk(filename,map) + use map_module + use mmap_module + implicit none + character*(*), intent(in) :: filename + type(map_type), intent(inout) :: map + type(c_ptr) :: c_pointer(3) + integer :: fd(3) + integer*8 :: i,k + integer :: n_elements + + + + if (map % consolidated) then + stop 'map already consolidated' + endif + + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) + call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) + + map% n_elements = map % consolidated_idx (map % map_size+2_8)-1 + + call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) + call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) + + call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) + call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) + + k = 1_8 + do i=0_8, map % map_size + deallocate(map % map(i) % value) + deallocate(map % map(i) % key) + map % map(i) % value => map % consolidated_value ( map % consolidated_idx (i+1) :) + map % map(i) % key => map % consolidated_key ( map % consolidated_idx (i+1) :) + map % map(i) % sorted = .True. + n_elements = map % consolidated_idx (i+2) - k + k = map % consolidated_idx (i+2) + map % map(i) % map_size = n_elements + map % map(i) % n_elements = n_elements + enddo + map % n_elements = k-1 + map % sorted = .True. + map % consolidated = .True. + +end + diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index 47adc83e..c2a5cbf1 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -30,8 +30,8 @@ module map_module integer*8, parameter :: map_mask = ibset(0_8,15)-1_8 type cache_map_type - integer(cache_key_kind), pointer :: key(:) real(integral_kind), pointer :: value(:) + integer(cache_key_kind), pointer :: key(:) logical :: sorted integer(cache_map_size_kind) :: map_size integer(cache_map_size_kind) :: n_elements @@ -40,9 +40,13 @@ module map_module type map_type type(cache_map_type), allocatable :: map(:) + real(integral_kind), pointer :: consolidated_value(:) + integer(cache_key_kind), pointer :: consolidated_key(:) + integer*8, pointer :: consolidated_idx(:) + logical :: sorted + logical :: consolidated integer(map_size_kind) :: map_size integer(map_size_kind) :: n_elements - logical :: sorted integer(omp_lock_kind) :: lock end type map_type @@ -92,6 +96,7 @@ subroutine map_init(map,keymax) map%n_elements = 0_8 map%map_size = ishft(keymax,map_shift) + map%consolidated = .False. allocate(map%map(0_8:map%map_size),stat=err) if (err /= 0) then @@ -618,6 +623,7 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in) idx = ibegin + istep do while (istep > 16) idx = ibegin + istep + ! TODO : Cache misses if (cache_key < X(idx)) then iend = idx istep = ishft(idx-ibegin,-1) @@ -655,12 +661,10 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in) idx = ibegin if (min(iend_in,sze) > ibegin+16) then iend = ibegin+16 - !DIR$ VECTOR ALIGNED do while (cache_key > X(idx)) idx = idx+1 end do else - !DIR$ VECTOR ALIGNED do while (cache_key > X(idx)) idx = idx+1 if (idx /= iend) then @@ -768,13 +772,11 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in value = Y(idx) if (min(iend_in,sze) > ibegin+16) then iend = ibegin+16 - !DIR$ VECTOR ALIGNED do while (cache_key > X(idx)) idx = idx+1 value = Y(idx) end do else - !DIR$ VECTOR ALIGNED do while (cache_key > X(idx)) idx = idx+1 value = Y(idx) @@ -848,8 +850,9 @@ subroutine get_cache_map(map,map_idx,keys,values,n_elements) n_elements = map%map(map_idx)%n_elements do i=1,n_elements - keys(i) = map%map(map_idx)%key(i) + shift + keys(i) = map%map(map_idx)%key(i) + shift values(i) = map%map(map_idx)%value(i) enddo end + diff --git a/src/Utils/mmap.f90 b/src/Utils/mmap.f90 new file mode 100644 index 00000000..75b996de --- /dev/null +++ b/src/Utils/mmap.f90 @@ -0,0 +1,69 @@ +module mmap_module + + use iso_c_binding + + interface + + ! File descriptors + ! ---------------- + + type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only) bind(c,name='mmap_fortran') + use iso_c_binding + character(c_char), intent(in) :: filename(*) + integer(c_size_t), intent(in), value :: length + integer(c_int), intent(out) :: fd + integer(c_int), intent(in), value :: read_only + end function + + subroutine c_munmap(length, fd, map) bind(c,name='munmap_fortran') + use iso_c_binding + integer(c_size_t), intent(in), value :: length + integer(c_int), intent(in), value :: fd + type(c_ptr), intent(in), value :: map + end subroutine + + end interface + + contains + + subroutine mmap(filename, shape, bytes, fd, read_only, map) + use iso_c_binding + implicit none + character*(*), intent(in) :: filename ! Name of the mapped file + integer*8, intent(in) :: shape(:) ! Shape of the array to map + integer, intent(in) :: bytes ! Number of bytes per element + logical, intent(in) :: read_only ! If true, mmap is read-only + integer, intent(out) :: fd ! File descriptor + type(c_ptr), intent(out) :: map ! C Pointer + + integer(c_long) :: length + integer(c_int) :: fd_ + + length = PRODUCT( shape(:) ) * bytes + if (read_only) then + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) + else + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 0) + endif + fd = fd_ + end subroutine + + subroutine munmap(shape, bytes, fd, map) + use iso_c_binding + implicit none + integer*8, intent(in) :: shape(:) ! Shape of the array to map + integer, intent(in) :: bytes ! Number of bytes per element + integer, intent(in) :: fd ! File descriptor + type(c_ptr), intent(in) :: map ! C pointer + + integer(c_long) :: length + integer(c_int) :: fd_ + + length = PRODUCT( shape(:) ) * bytes + fd_ = fd + call c_munmap( length, fd_, map) + end subroutine + +end module mmap_module + + diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index ae1de6e7..13e91d11 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -605,7 +605,7 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task) end -subroutine task_done_to_taskserver(zmq_to_qp_run_socket,worker_id, task_id) +subroutine task_done_to_taskserver(zmq_to_qp_run_socket, worker_id, task_id) use f77_zmq implicit none BEGIN_DOC