diff --git a/docs/source/users_guide/qpsh.rst b/docs/source/users_guide/qpsh.rst index 7a1a874c..049ab8e3 100644 --- a/docs/source/users_guide/qpsh.rst +++ b/docs/source/users_guide/qpsh.rst @@ -8,7 +8,7 @@ qpsh :command:`qpsh` is the |qp| shell. It is a Bash shell with all the -required evironment variables loaded, a modified prompt, and the +required environment variables loaded, a modified prompt, and the :ref:`qp` command. diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index 0cb862ae..bb886143 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -132,6 +132,7 @@ let run slave ?prefix exe ezfio_file = (** Run executable *) let prefix = match prefix with + | Some "gdb" -> "gdb --args " | Some x -> x^" " | None -> "" and exe = diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 319e3c6e..d409a6d5 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -5,7 +5,7 @@ double precision function get_ao_integ_chol(i,j,k,l) ! i(r1) j(r1) 1/r12 k(r2) l(r2) END_DOC integer, intent(in) :: i,j,k,l - double precision, external :: ddot + double precision, external :: ddot get_ao_integ_chol = ddot(cholesky_ao_num, cholesky_ao_transp(1,i,j), 1, cholesky_ao_transp(1,k,l), 1) end @@ -76,8 +76,7 @@ END_PROVIDER ndim8 = ao_num*ao_num*1_8+1 double precision :: wall0,wall1 - type(c_ptr) :: c_pointer(2) - integer :: fd(2) + type(mmap_type) :: map PROVIDE nproc ao_cholesky_threshold do_direct_integrals qp_max_mem PROVIDE nucl_coord ao_two_e_integral_schwartz @@ -156,7 +155,7 @@ END_PROVIDER enddo !$OMP END PARALLEL DO endif - ! Just to guarentee termination + ! Just to guarentee termination D(ndim8) = 0.d0 D_sorted(:) = -D(:) @@ -181,14 +180,9 @@ END_PROVIDER if (elec_num > 10) then rank_max = min(np,20*elec_num*elec_num) endif - call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) - call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) - - ! Deleting the file while it is open makes the file invisible on the filesystem, - ! and automatically deleted, even if the program crashes - iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') - close(iunit,status='delete') + call mmap_create_d('', (/ ndim8, rank_max /), .False., .True., map) + L => map%d2 ! 3. N = 0 @@ -205,7 +199,7 @@ END_PROVIDER do while ( (Dmax > tau).and.(np > 0) ) ! a. i = i+1 - + block_size = max(N,24) @@ -317,7 +311,7 @@ END_PROVIDER ! g. iblock = 0 - + do j=1,nq if ( (Qmax < Dmin).or.(N+j*1_8 > ndim8) ) exit @@ -480,7 +474,7 @@ END_PROVIDER enddo !$OMP END PARALLEL DO - call munmap( (/ ndim8, rank_max /), 8, fd(1), c_pointer(1) ) + call mmap_destroy(map) cholesky_ao_num = rank diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index d8131a9c..e4907f22 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -154,14 +154,14 @@ subroutine run_ccsd_space_orb allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth)) !$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED) + !$OMP DO COLLAPSE(2) do j=1,cc_diis_depth - !$OMP DO do i=1, size(all_err,1) all_err(i,j) = 0d0 all_t(i,j) = 0d0 enddo - !$OMP END DO NOWAIT enddo + !$OMP END DO NOWAIT !$OMP END PARALLEL endif @@ -237,6 +237,7 @@ subroutine run_ccsd_space_orb call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2%f,t2%f) else print*,'Unkown cc_method_method: '//cc_update_method + call abort endif call update_tau_space(nO,nV,t1%f,t1,t2,tau) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 191e0021..d299f982 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -223,12 +223,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ exit endif - if (itermax > 4) then - itermax = itermax - 1 - else if (m==1.and.disk_based_davidson) then + if (disk_based_davidson) then m=0 disk_based = .True. - itermax = 6 + else if (itermax > 4) then + itermax = itermax - 1 else nproc_target = nproc_target - 1 endif @@ -267,14 +266,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if (disk_based) then ! Create memory-mapped files for W and S - type(c_ptr) :: ptr_w, ptr_s - integer :: fd_s, fd_w - call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., .True., ptr_w) - call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 4, fd_s, .False., .True., ptr_s) - call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) - call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) + type(mmap_type) :: map_s, map_w + + call mmap_create_d('', (/ 1_8*sze, 1_8*N_st_diag*itermax /), .False., .True., map_w) + call mmap_create_s('', (/ 1_8*sze, 1_8*N_st_diag*itermax /), .False., .True., map_s) + w => map_w%d2 + s => map_s%s2 else allocate(W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax)) endif @@ -755,13 +752,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if (disk_based)then ! Remove temp files - integer, external :: getUnitAndOpen - call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w ) - fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r') - close(fd_w,status='delete') - call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 4, fd_s, ptr_s ) - fd_s = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_s','r') - close(fd_s,status='delete') + call mmap_destroy(map_w) + call mmap_destroy(map_s) else deallocate(W,S) endif @@ -774,6 +766,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ lambda & ) FREE nthreads_davidson + end diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 6b852905..764309ed 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -330,6 +330,10 @@ END_PROVIDER deallocate(eigenvectors,eigenvalues) endif +! ! Dominant determinants for each states +! call print_dominant_det(psi_det,CI_eigenvectors,N_det,N_states,N_int) +! call wf_overlap(psi_det,psi_coef,N_states,N_det,psi_det,CI_eigenvectors,N_states,N_det) + END_PROVIDER subroutine diagonalize_CI diff --git a/src/davidson/u0_h_u0.irp.f b/src/davidson/u0_h_u0.irp.f index 7ef154a3..808bbb5d 100644 --- a/src/davidson/u0_h_u0.irp.f +++ b/src/davidson/u0_h_u0.irp.f @@ -179,10 +179,12 @@ subroutine H_u_0_nstates_openmp_work_$N_int(v_t,u_t,N_st,sze,istart,iend,ishift, ! ! compute_singles = (mem+rss > qp_max_mem) ! -! if (.not.compute_singles) then -! provide singles_beta_csc -! endif -compute_singles=.True. + compute_singles=.True. + + if (.not.compute_singles) then + provide singles_alpha_csc singles_beta_csc + endif + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 @@ -287,8 +289,7 @@ compute_singles=.True. tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) -!--- -! if (compute_singles) then + if (compute_singles) then l_a = psi_bilinear_matrix_columns_loc(lcol) ASSERT (l_a <= N_det) @@ -311,69 +312,67 @@ compute_singles=.True. buffer, idx, tmp_det(1,1), j, & singles_a, n_singles_a ) -!----- -! else -! -! ! Search for singles -! -!call cpu_time(time0) -! ! Right boundary -! l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1 -! ASSERT (l_a <= N_det) -! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) -! lrow = psi_bilinear_matrix_rows(l_a) -! ASSERT (lrow <= N_det_alpha_unique) -! -! left = singles_alpha_csc_idx(krow) -! right_max = -1_8 -! right = singles_alpha_csc_idx(krow+1) -! do while (right-left>0_8) -! k8 = shiftr(right+left,1) -! if (singles_alpha_csc(k8) > lrow) then -! right = k8 -! else if (singles_alpha_csc(k8) < lrow) then -! left = k8 + 1_8 -! else -! right_max = k8+1_8 -! exit -! endif -! enddo -! if (right_max > 0_8) exit -! l_a = l_a-1 -! enddo -! if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow) -! -! ! Search -! n_singles_a = 0 -! l_a = psi_bilinear_matrix_columns_loc(lcol) -! ASSERT (l_a <= N_det) -! -! last_found = singles_alpha_csc_idx(krow) -! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) -! lrow = psi_bilinear_matrix_rows(l_a) -! ASSERT (lrow <= N_det_alpha_unique) -! -! left = last_found -! right = right_max -! do while (right-left>0_8) -! k8 = shiftr(right+left,1) -! if (singles_alpha_csc(k8) > lrow) then -! right = k8 -! else if (singles_alpha_csc(k8) < lrow) then -! left = k8 + 1_8 -! else -! n_singles_a += 1 -! singles_a(n_singles_a) = l_a -! last_found = k8+1_8 -! exit -! endif -! enddo -! l_a = l_a+1 -! enddo -! j = j-1 -! -! endif -!----- + else + + ! Search for singles + + ! Right boundary + l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1 + ASSERT (l_a <= N_det) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + left = singles_alpha_csc_idx(krow) + right_max = -1_8 + right = singles_alpha_csc_idx(krow+1) + do while (right-left>0_8) + k8 = shiftr(right+left,1) + if (singles_alpha_csc(k8) > lrow) then + right = k8 + else if (singles_alpha_csc(k8) < lrow) then + left = k8 + 1_8 + else + right_max = k8+1_8 + exit + endif + enddo + if (right_max > 0_8) exit + l_a = l_a-1 + enddo + if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow) + + ! Search + n_singles_a = 0 + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + + last_found = singles_alpha_csc_idx(krow) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + left = last_found + right = right_max + do while (right-left>0_8) + k8 = shiftr(right+left,1) + if (singles_alpha_csc(k8) > lrow) then + right = k8 + else if (singles_alpha_csc(k8) < lrow) then + left = k8 + 1_8 + else + n_singles_a += 1 + singles_a(n_singles_a) = l_a + last_found = k8+1_8 + exit + endif + enddo + l_a = l_a+1 + enddo + j = j-1 + + endif + ! Loop over alpha singles ! ----------------------- diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f index 38fb56bd..3afe4ec6 100644 --- a/src/davidson/u0_hs2_u0.irp.f +++ b/src/davidson/u0_hs2_u0.irp.f @@ -218,10 +218,13 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend, ! ! compute_singles = (mem+rss > qp_max_mem) ! -! if (.not.compute_singles) then -! provide singles_beta_csc -! endif -compute_singles=.True. + compute_singles=.True. + + if (.not.compute_singles) then + provide singles_alpha_csc + provide singles_beta_csc + endif + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 @@ -314,6 +317,7 @@ compute_singles=.True. singles_b(n_singles_b) = singles_beta_csc(k8) enddo endif + endif kcol_prev = kcol @@ -326,8 +330,7 @@ compute_singles=.True. tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) -!--- -! if (compute_singles) then + if (compute_singles) then l_a = psi_bilinear_matrix_columns_loc(lcol) ASSERT (l_a <= N_det) @@ -352,69 +355,66 @@ compute_singles=.True. buffer, idx, tmp_det(1,1), j, & singles_a, n_singles_a ) -!----- -! else -! -! ! Search for singles -! -!call cpu_time(time0) -! ! Right boundary -! l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1 -! ASSERT (l_a <= N_det) -! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) -! lrow = psi_bilinear_matrix_rows(l_a) -! ASSERT (lrow <= N_det_alpha_unique) -! -! left = singles_alpha_csc_idx(krow) -! right_max = -1_8 -! right = singles_alpha_csc_idx(krow+1) -! do while (right-left>0_8) -! k8 = shiftr(right+left,1) -! if (singles_alpha_csc(k8) > lrow) then -! right = k8 -! else if (singles_alpha_csc(k8) < lrow) then -! left = k8 + 1_8 -! else -! right_max = k8+1_8 -! exit -! endif -! enddo -! if (right_max > 0_8) exit -! l_a = l_a-1 -! enddo -! if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow) -! -! ! Search -! n_singles_a = 0 -! l_a = psi_bilinear_matrix_columns_loc(lcol) -! ASSERT (l_a <= N_det) -! -! last_found = singles_alpha_csc_idx(krow) -! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) -! lrow = psi_bilinear_matrix_rows(l_a) -! ASSERT (lrow <= N_det_alpha_unique) -! -! left = last_found -! right = right_max -! do while (right-left>0_8) -! k8 = shiftr(right+left,1) -! if (singles_alpha_csc(k8) > lrow) then -! right = k8 -! else if (singles_alpha_csc(k8) < lrow) then -! left = k8 + 1_8 -! else -! n_singles_a += 1 -! singles_a(n_singles_a) = l_a -! last_found = k8+1_8 -! exit -! endif -! enddo -! l_a = l_a+1 -! enddo -! j = j-1 -! -! endif -!----- + else + + ! Search for singles + + ! Right boundary + l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1 + ASSERT (l_a <= N_det) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + left = singles_alpha_csc_idx(krow) + right_max = -1_8 + right = singles_alpha_csc_idx(krow+1) + do while (right-left>0_8) + k8 = shiftr(right+left,1) + if (singles_alpha_csc(k8) > lrow) then + right = k8 + else if (singles_alpha_csc(k8) < lrow) then + left = k8 + 1_8 + else + right_max = k8+1_8 + exit + endif + enddo + if (right_max > 0_8) exit + l_a = l_a-1 + enddo + if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow) + + ! Search + n_singles_a = 0 + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + + last_found = singles_alpha_csc_idx(krow) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + left = last_found + right = right_max + do while (right-left>0_8) + k8 = shiftr(right+left,1) + if (singles_alpha_csc(k8) > lrow) then + right = k8 + else if (singles_alpha_csc(k8) < lrow) then + left = k8 + 1_8 + else + n_singles_a += 1 + singles_a(n_singles_a) = l_a + last_found = k8+1_8 + exit + endif + enddo + l_a = l_a+1 + enddo + j = j-1 + + endif ! Loop over alpha singles ! ----------------------- diff --git a/src/davidson_keywords/EZFIO.cfg b/src/davidson_keywords/EZFIO.cfg index 6337b96f..81b7f949 100644 --- a/src/davidson_keywords/EZFIO.cfg +++ b/src/davidson_keywords/EZFIO.cfg @@ -48,7 +48,7 @@ default: false [distributed_davidson] type: logical -doc: If |true|, use the distributed algorithm -default: True +doc: If |true|, use the distributed algorithm. If you plan to run multi-node calculations, set this to true before running. +default: False interface: ezfio,provider,ocaml diff --git a/src/determinants/ref_bitmask.irp.f b/src/determinants/ref_bitmask.irp.f index 4e029ceb..18fa2396 100644 --- a/src/determinants/ref_bitmask.irp.f +++ b/src/determinants/ref_bitmask.irp.f @@ -30,31 +30,30 @@ ref_bitmask_energy += mo_one_e_integrals(occ(i,1),occ(i,1)) + mo_one_e_integrals(occ(i,2),occ(i,2)) ref_bitmask_kinetic_energy += mo_kinetic_integrals(occ(i,1),occ(i,1)) + mo_kinetic_integrals(occ(i,2),occ(i,2)) ref_bitmask_n_e_energy += mo_integrals_n_e(occ(i,1),occ(i,1)) + mo_integrals_n_e(occ(i,2),occ(i,2)) + do j = i+1, elec_alpha_num + ref_bitmask_two_e_energy += mo_two_e_integrals_jj_anti(occ(j,1),occ(i,1)) + ref_bitmask_energy += mo_two_e_integrals_jj_anti(occ(j,1),occ(i,1)) + enddo + do j= 1, elec_alpha_num + ref_bitmask_two_e_energy += mo_two_e_integrals_jj(occ(j,1),occ(i,2)) + ref_bitmask_energy += mo_two_e_integrals_jj(occ(j,1),occ(i,2)) + enddo + do j = i+1, elec_beta_num + ref_bitmask_two_e_energy += mo_two_e_integrals_jj_anti(occ(j,2),occ(i,2)) + ref_bitmask_energy += mo_two_e_integrals_jj_anti(occ(j,2),occ(i,2)) + enddo enddo do i = elec_beta_num+1,elec_alpha_num ref_bitmask_energy += mo_one_e_integrals(occ(i,1),occ(i,1)) ref_bitmask_kinetic_energy += mo_kinetic_integrals(occ(i,1),occ(i,1)) ref_bitmask_n_e_energy += mo_integrals_n_e(occ(i,1),occ(i,1)) - enddo - - do j= 1, elec_alpha_num - do i = j+1, elec_alpha_num - ref_bitmask_two_e_energy += mo_two_e_integrals_jj_anti(occ(i,1),occ(j,1)) - ref_bitmask_energy += mo_two_e_integrals_jj_anti(occ(i,1),occ(j,1)) + do j = i+1, elec_alpha_num + ref_bitmask_two_e_energy += mo_two_e_integrals_jj_anti(occ(j,1),occ(i,1)) + ref_bitmask_energy += mo_two_e_integrals_jj_anti(occ(j,1),occ(i,1)) enddo enddo - do j= 1, elec_beta_num - do i = j+1, elec_beta_num - ref_bitmask_two_e_energy += mo_two_e_integrals_jj_anti(occ(i,2),occ(j,2)) - ref_bitmask_energy += mo_two_e_integrals_jj_anti(occ(i,2),occ(j,2)) - enddo - do i= 1, elec_alpha_num - ref_bitmask_two_e_energy += mo_two_e_integrals_jj(occ(i,1),occ(j,2)) - ref_bitmask_energy += mo_two_e_integrals_jj(occ(i,1),occ(j,2)) - enddo - enddo ref_bitmask_one_e_energy = ref_bitmask_kinetic_energy + ref_bitmask_n_e_energy ref_bitmask_energy_ab = 0.d0 diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f index dd55e112..87c5d360 100644 --- a/src/determinants/spindeterminants.irp.f +++ b/src/determinants/spindeterminants.irp.f @@ -910,6 +910,8 @@ subroutine copy_psi_bilinear_to_psi(psi, isize) end +use mmap_module + BEGIN_PROVIDER [ integer*8, singles_alpha_csc_idx, (N_det_alpha_unique+1) ] &BEGIN_PROVIDER [ integer*8, singles_alpha_csc_size ] implicit none @@ -925,12 +927,11 @@ end idx0(i) = i enddo - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, & - !$OMP idx0, N_int, singles_alpha_csc, & - !$OMP elec_alpha_num, mo_num, singles_alpha_csc_idx) & + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, & + !$OMP idx0, N_int, singles_alpha_csc_idx) & !$OMP PRIVATE(i,s,j) - allocate (s(elec_alpha_num * (mo_num-elec_alpha_num) )) + allocate (s(N_det_alpha_unique)) !$OMP DO SCHEDULE(static,64) do i=1, N_det_alpha_unique call get_all_spin_singles( & @@ -966,7 +967,7 @@ BEGIN_PROVIDER [ integer, singles_alpha_csc, (singles_alpha_csc_size) ] !$OMP PARALLEL DO DEFAULT(NONE) & !$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, & !$OMP idx0, N_int, singles_alpha_csc, singles_alpha_csc_idx)& - !$OMP PRIVATE(i,k) SCHEDULE(static,1) + !$OMP PRIVATE(i,k) SCHEDULE(static) do i=1, N_det_alpha_unique call get_all_spin_singles( & psi_det_alpha_unique, idx0, psi_det_alpha_unique(1,i), N_int,& @@ -978,7 +979,36 @@ BEGIN_PROVIDER [ integer, singles_alpha_csc, (singles_alpha_csc_size) ] END_PROVIDER +BEGIN_PROVIDER [ type(mmap_type), singles_alpha_csc_map ] + implicit none + BEGIN_DOC + ! Indices of all single excitations + END_DOC + integer :: i, k + integer, allocatable :: idx0(:) + call mmap_create_i('', (/ 1_8*singles_alpha_csc_size /), & + .False., .False., singles_alpha_csc_map) + + allocate (idx0(N_det_alpha_unique)) + do i=1, N_det_alpha_unique + idx0(i) = i + enddo + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, & + !$OMP idx0, N_int, singles_alpha_csc_map, singles_alpha_csc_idx)& + !$OMP PRIVATE(i,k) SCHEDULE(static) + do i=1, N_det_alpha_unique + call get_all_spin_singles( & + psi_det_alpha_unique, idx0, psi_det_alpha_unique(1,i), N_int, N_det_alpha_unique, & + singles_alpha_csc_map%i1(singles_alpha_csc_idx(i):singles_alpha_csc_idx(i)+N_det_alpha_unique-1),& + k) + enddo + !$OMP END PARALLEL DO + deallocate(idx0) + +END_PROVIDER BEGIN_PROVIDER [ integer*8, singles_beta_csc_idx, (N_det_beta_unique+1) ] @@ -996,13 +1026,12 @@ END_PROVIDER idx0(i) = i enddo - !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(N_det_beta_unique, psi_det_beta_unique, & - !$OMP idx0, N_int, singles_beta_csc, & - !$OMP elec_beta_num, mo_num, singles_beta_csc_idx) & + !$OMP idx0, N_int, singles_beta_csc_idx) & !$OMP PRIVATE(i,s,j) - allocate (s(elec_beta_num*(mo_num-elec_beta_num))) - !$OMP DO SCHEDULE(static,1) + allocate (s(N_det_beta_unique)) + !$OMP DO SCHEDULE(static) do i=1, N_det_beta_unique call get_all_spin_singles( & psi_det_beta_unique, idx0, psi_det_beta_unique(1,i), N_int,& @@ -1037,7 +1066,7 @@ BEGIN_PROVIDER [ integer, singles_beta_csc, (singles_beta_csc_size) ] !$OMP PARALLEL DO DEFAULT(NONE) & !$OMP SHARED(N_det_beta_unique, psi_det_beta_unique, & !$OMP idx0, N_int, singles_beta_csc, singles_beta_csc_idx)& - !$OMP PRIVATE(i,k) SCHEDULE(static,64) + !$OMP PRIVATE(i,k) SCHEDULE(static) do i=1, N_det_beta_unique call get_all_spin_singles( & psi_det_beta_unique, idx0, psi_det_beta_unique(1,i), N_int,& @@ -1049,6 +1078,37 @@ BEGIN_PROVIDER [ integer, singles_beta_csc, (singles_beta_csc_size) ] END_PROVIDER +BEGIN_PROVIDER [ type(mmap_type), singles_beta_csc_map ] + implicit none + BEGIN_DOC + ! Indices of all single excitations + END_DOC + integer :: i, k + integer, allocatable :: idx0(:) + + call mmap_create_i('', (/ 1_8*singles_beta_csc_size /), & + .False., .False., singles_beta_csc_map) + + allocate (idx0(N_det_beta_unique)) + do i=1, N_det_beta_unique + idx0(i) = i + enddo + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP SHARED(N_det_beta_unique, psi_det_beta_unique, & + !$OMP idx0, N_int, singles_beta_csc_map, singles_beta_csc_idx)& + !$OMP PRIVATE(i,k) SCHEDULE(static) + do i=1, N_det_beta_unique + call get_all_spin_singles( & + psi_det_beta_unique, idx0, psi_det_beta_unique(1,i), N_int, N_det_beta_unique, & + singles_beta_csc_map%i1(singles_beta_csc_idx(i):singles_beta_csc_idx(i)+N_det_beta_unique-1),& + k) + enddo + !$OMP END PARALLEL DO + deallocate(idx0) + +END_PROVIDER + @@ -1111,16 +1171,16 @@ subroutine get_all_spin_singles_1(buffer, idx, spindet, size_buffer, singles, n_ integer :: i integer(bit_kind) :: v integer :: degree - integer :: add_single(0:64) = (/ 0, 0, 1, 0, 0, (0, i=1,60) /) include 'utils/constants.include.F' - n_singles = 1 + n_singles = 0 do i=1,size_buffer degree = popcnt(xor( spindet, buffer(i) )) - singles(n_singles) = idx(i) - n_singles = n_singles+add_single(degree) + if (degree == 2) then + n_singles = n_singles+1 + singles(n_singles) = idx(i) + endif enddo - n_singles = n_singles-1 end @@ -1142,15 +1202,15 @@ subroutine get_all_spin_doubles_1(buffer, idx, spindet, size_buffer, doubles, n_ integer :: i include 'utils/constants.include.F' integer :: degree - integer :: add_double(0:64) = (/ 0, 0, 0, 0, 1, (0, i=1,60) /) - n_doubles = 1 + n_doubles = 0 do i=1,size_buffer degree = popcnt(xor( spindet, buffer(i) )) - doubles(n_doubles) = idx(i) - n_doubles = n_doubles+add_double(degree) + if (degree == 4) then + n_doubles = n_doubles+1 + doubles(n_doubles) = idx(i) + endif enddo - n_doubles = n_doubles-1 end @@ -1181,8 +1241,8 @@ subroutine get_all_spin_singles_and_doubles_$N_int(buffer, idx, spindet, size_bu integer(bit_kind) :: xorvec($N_int) integer :: degree - n_singles = 1 - n_doubles = 1 + n_singles = 0 + n_doubles = 0 do i=1,size_buffer do k=1,$N_int @@ -1196,16 +1256,14 @@ subroutine get_all_spin_singles_and_doubles_$N_int(buffer, idx, spindet, size_bu enddo if ( degree == 4 ) then - doubles(n_doubles) = idx(i) n_doubles = n_doubles+1 + doubles(n_doubles) = idx(i) else if ( degree == 2 ) then - singles(n_singles) = idx(i) n_singles = n_singles+1 + singles(n_singles) = idx(i) endif enddo - n_singles = n_singles-1 - n_doubles = n_doubles-1 end @@ -1230,7 +1288,7 @@ subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, single integer(bit_kind) :: xorvec($N_int) integer :: degree - n_singles = 1 + n_singles = 0 do i=1,size_buffer do k=1,$N_int @@ -1247,11 +1305,10 @@ subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, single cycle endif - singles(n_singles) = idx(i) n_singles = n_singles+1 + singles(n_singles) = idx(i) enddo - n_singles = n_singles-1 end @@ -1275,7 +1332,7 @@ subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, double include 'utils/constants.include.F' integer(bit_kind) :: xorvec($N_int) - n_doubles = 1 + n_doubles = 0 do i=1,size_buffer do k=1,$N_int @@ -1292,13 +1349,11 @@ subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, double cycle endif - doubles(n_doubles) = idx(i) n_doubles = n_doubles+1 + doubles(n_doubles) = idx(i) enddo - n_doubles = n_doubles-1 - end SUBST [ N_int ] diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index 02f45571..ef19d551 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -60,3 +60,16 @@ BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ] ezfio_work_dir = trim(ezfio_filename)//'/work/' END_PROVIDER +BEGIN_PROVIDER [ character*(1024), ezfio_work_dir_pid ] + use c_functions + implicit none + BEGIN_DOC + ! EZFIO/work/pid_ + END_DOC + character*(32) :: pid_str + integer :: getpid + + write(pid_str,*) getpid() + ezfio_work_dir_pid = trim(ezfio_work_dir)//'/'//trim(pid_str)//'_' +END_PROVIDER + diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 index 797a8839..90b82667 100644 --- a/src/gpu/gpu_module.F90 +++ b/src/gpu/gpu_module.F90 @@ -268,8 +268,12 @@ module gpu implicit none type(gpu_double1), intent(inout) :: ptr integer, intent(in) :: s + integer*8 :: s_8, n - call gpu_allocate_c(ptr%c, s*8_8) + s_8 = s + n = s_8 * 8_8 + + call gpu_allocate_c(ptr%c, n) call c_f_pointer(ptr%c, ptr%f, (/ s /)) end subroutine @@ -277,8 +281,13 @@ module gpu implicit none type(gpu_double2), intent(inout) :: ptr integer, intent(in) :: s1, s2 + integer*8 :: s1_8, s2_8, n - call gpu_allocate_c(ptr%c, s1*s2*8_8) + s1_8 = s1 + s2_8 = s2 + n = s1_8 * s2_8 * 8_8 + + call gpu_allocate_c(ptr%c, n) call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /)) end subroutine @@ -286,8 +295,14 @@ module gpu implicit none type(gpu_double3), intent(inout) :: ptr integer, intent(in) :: s1, s2, s3 + integer*8 :: s1_8, s2_8, s3_8, n - call gpu_allocate_c(ptr%c, s1*s2*s3*8_8) + s1_8 = s1 + s2_8 = s2 + s3_8 = s3 + n = s1_8 * s2_8 * s3_8 * 8_8 + + call gpu_allocate_c(ptr%c, n) call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /)) end subroutine @@ -295,8 +310,15 @@ module gpu implicit none type(gpu_double4), intent(inout) :: ptr integer, intent(in) :: s1, s2, s3, s4 + integer*8 :: s1_8, s2_8, s3_8, s4_8, n - call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8) + s1_8 = s1 + s2_8 = s2 + s3_8 = s3 + s4_8 = s4 + n = s1_8 * s2_8 * s3_8 * s4_8 * 8_8 + + call gpu_allocate_c(ptr%c, n) call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /)) end subroutine @@ -304,8 +326,16 @@ module gpu implicit none type(gpu_double5), intent(inout) :: ptr integer, intent(in) :: s1, s2, s3, s4, s5 + integer*8 :: s1_8, s2_8, s3_8, s4_8, s5_8, n - call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8) + s1_8 = s1 + s2_8 = s2 + s3_8 = s3 + s4_8 = s4 + s5_8 = s5 + n = s1_8 * s2_8 * s3_8 * s4_8 * s5_8 * 8_8 + + call gpu_allocate_c(ptr%c, n) call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /)) end subroutine @@ -313,8 +343,17 @@ module gpu implicit none type(gpu_double6), intent(inout) :: ptr integer, intent(in) :: s1, s2, s3, s4, s5, s6 + integer*8 :: s1_8, s2_8, s3_8, s4_8, s5_8, s6_8, n - call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8) + s1_8 = s1 + s2_8 = s2 + s3_8 = s3 + s4_8 = s4 + s5_8 = s5 + s6_8 = s6 + n = s1_8 * s2_8 * s3_8 * s4_8 * s5_8 * s6_8 * 8_8 + + call gpu_allocate_c(ptr%c, n) call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /)) end subroutine diff --git a/src/hartree_fock/hf_energy.irp.f b/src/hartree_fock/hf_energy.irp.f index a0f9f897..862d64b0 100644 --- a/src/hartree_fock/hf_energy.irp.f +++ b/src/hartree_fock/hf_energy.irp.f @@ -19,16 +19,41 @@ END_PROVIDER ! Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components. END_DOC integer :: i,j - HF_energy = nuclear_repulsion + double precision :: tmp1, tmp2 + HF_energy = 0.d0 HF_two_electron_energy = 0.d0 HF_one_electron_energy = 0.d0 do j=1,ao_num do i=1,ao_num - HF_two_electron_energy += 0.5d0 * ( ao_two_e_integral_alpha(i,j) * SCF_density_matrix_ao_alpha(i,j) & - +ao_two_e_integral_beta(i,j) * SCF_density_matrix_ao_beta(i,j) ) - HF_one_electron_energy += ao_one_e_integrals(i,j) * (SCF_density_matrix_ao_alpha(i,j) + SCF_density_matrix_ao_beta (i,j) ) + tmp1 = 0.5d0 * ( ao_two_e_integral_alpha(i,j) * SCF_density_matrix_ao_alpha(i,j) & + +ao_two_e_integral_beta (i,j) * SCF_density_matrix_ao_beta (i,j) ) + tmp2 = ao_one_e_integrals(i,j) * (SCF_density_matrix_ao_alpha(i,j) + SCF_density_matrix_ao_beta (i,j) ) + HF_two_electron_energy += tmp1 + HF_one_electron_energy += tmp2 + HF_energy += tmp1 + tmp2 + enddo + enddo + HF_energy += nuclear_repulsion +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, HF_kinetic_energy] +&BEGIN_PROVIDER [ double precision, HF_n_e_energy] + implicit none + BEGIN_DOC + ! Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components. + END_DOC + integer :: i,j + double precision :: tmp1, tmp2 + HF_n_e_energy = 0.d0 + HF_kinetic_energy = 0.d0 + do j=1,ao_num + do i=1,ao_num + tmp1 = ao_integrals_n_e(i,j) * (SCF_density_matrix_ao_alpha(i,j) + SCF_density_matrix_ao_beta (i,j) ) + tmp2 = ao_kinetic_integrals(i,j) * (SCF_density_matrix_ao_alpha(i,j) + SCF_density_matrix_ao_beta (i,j) ) + HF_n_e_energy += tmp1 + HF_kinetic_energy += tmp2 enddo enddo - HF_energy += HF_two_electron_energy + HF_one_electron_energy END_PROVIDER diff --git a/src/mo_basis/mos.irp.f b/src/mo_basis/mos.irp.f index 57ebb533..1ed859ee 100644 --- a/src/mo_basis/mos.irp.f +++ b/src/mo_basis/mos.irp.f @@ -277,7 +277,7 @@ subroutine ao_to_mo(A_ao,LDA_ao,A_mo,LDA_mo) T, ao_num, & 0.d0, A_mo, size(A_mo,1)) - call restore_symmetry(mo_num,mo_num,A_mo,size(A_mo,1),1.d-12) + call restore_symmetry(mo_num,mo_num,A_mo,size(A_mo,1),1.d-15) deallocate(T) end diff --git a/src/mo_one_e_ints/mo_one_e_ints.irp.f b/src/mo_one_e_ints/mo_one_e_ints.irp.f index 926ac7bd..fba55beb 100644 --- a/src/mo_one_e_ints/mo_one_e_ints.irp.f +++ b/src/mo_one_e_ints/mo_one_e_ints.irp.f @@ -18,6 +18,6 @@ BEGIN_PROVIDER [ double precision, mo_one_e_integrals,(mo_num,mo_num)] call ezfio_set_mo_one_e_ints_mo_one_e_integrals(mo_one_e_integrals) print *, 'MO one-e integrals written to disk' ENDIF - call nullify_small_elements(mo_num,mo_num,mo_one_e_integrals,size(mo_one_e_integrals,1),1.d-10) + call nullify_small_elements(mo_num,mo_num,mo_one_e_integrals,size(mo_one_e_integrals,1),1.d-15) END_PROVIDER diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 549bbed2..6ee23f4a 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -70,6 +70,10 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] else call add_integrals_to_map(full_ijkl_bitmask_4) endif + double precision, external :: map_mb + print*,'Molecular integrals provided:' + print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' + print*,' Number of MO integrals: ', mo_map_size endif call wall_time(wall_2) @@ -78,10 +82,6 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] integer*8 :: get_mo_map_size, mo_map_size mo_map_size = get_mo_map_size() - double precision, external :: map_mb - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size print*,' cpu time :',cpu_2 - cpu_1, 's' print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' diff --git a/src/tools/print_energy.irp.f b/src/tools/print_energy.irp.f index 0e67828e..1d3ea648 100644 --- a/src/tools/print_energy.irp.f +++ b/src/tools/print_energy.irp.f @@ -15,5 +15,21 @@ end subroutine run implicit none call print_mol_properties - print *, psi_energy + nuclear_repulsion + print *, psi_energy + nuclear_repulsion +! call print_energy_components +! print *, 'E(HF) = ', HF_energy +! print *, 'E(CI) = ', psi_energy + nuclear_repulsion +! print *, '' +! print *, 'E_kin(CI) = ', ref_bitmask_kinetic_energy +! print *, 'E_kin(HF) = ', HF_kinetic_energy +! print *, '' +! print *, 'E_ne (CI) = ', ref_bitmask_n_e_energy +! print *, 'E_ne (HF) = ', HF_n_e_energy +! print *, '' +! print *, 'E_1e (CI) = ', ref_bitmask_one_e_energy +! print *, 'E_1e (HF) = ', HF_one_electron_energy +! print *, '' +! print *, 'E_2e (CI) = ', ref_bitmask_two_e_energy +! print *, 'E_2e (HF) = ', HF_two_electron_energy + end diff --git a/src/tools/truncate_wf.irp.f b/src/tools/truncate_wf.irp.f index 6c66c8ec..1556e7d8 100644 --- a/src/tools/truncate_wf.irp.f +++ b/src/tools/truncate_wf.irp.f @@ -56,9 +56,14 @@ subroutine routine_s2 double precision :: accu(N_states) print *, 'Weights of the CFG' - do i=1,N_det + integer :: step + + step = max(1,N_det/100) + do i=1,N_det-1,step print *, i, real(weight_configuration(det_to_configuration(i),:)), real(sum(weight_configuration(det_to_configuration(i),:))) enddo + i=N_det + print *, i, real(weight_configuration(det_to_configuration(i),:)), real(sum(weight_configuration(det_to_configuration(i),:))) print*, 'Min weight of the configuration?' read(5,*) wmin diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index e342b422..4ac32233 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -2,6 +2,34 @@ module mmap_module use iso_c_binding + type mmap_type + type(c_ptr) :: ptr ! Pointer to the data + character*(128) :: filename ! Name of the file + integer*8 :: length ! Size of the array in bytes + integer :: fd ! File descriptor + + ! Pointers to data + integer, pointer :: i1(:) + integer, pointer :: i2(:,:) + integer, pointer :: i3(:,:,:) + integer, pointer :: i4(:,:,:,:) + + integer*8, pointer :: i81(:) + integer*8, pointer :: i82(:,:) + integer*8, pointer :: i83(:,:,:) + integer*8, pointer :: i84(:,:,:,:) + + double precision, pointer :: d1(:) + double precision, pointer :: d2(:,:) + double precision, pointer :: d3(:,:,:) + double precision, pointer :: d4(:,:,:,:) + + real, pointer :: s1(:) + real, pointer :: s2(:,:) + real, pointer :: s3(:,:,:) + real, pointer :: s4(:,:,:,:) + end type mmap_type + interface ! File descriptors @@ -82,7 +110,7 @@ module mmap_module length = length * shape(i) enddo fd_ = fd - call c_munmap_fortran( length, fd_, map) + call c_munmap_fortran(length, fd_, map) end subroutine subroutine msync(shape, bytes, fd, map) @@ -106,6 +134,200 @@ module mmap_module call c_msync_fortran( length, fd_, map) end subroutine + + ! Functions for the mmap_type + + subroutine mmap_create(filename, shape, bytes, read_only, single_node, map) + implicit none + character*(*), intent(in) :: filename ! Name of the mapped file + integer*8, intent(in) :: shape(:) ! Shape of the array to map + integer, intent(in) :: bytes ! Number of bytes per element + logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node + type(mmap_type), intent(out) :: map ! mmap + + integer :: i + logical :: temporary + + temporary = ( trim(filename) == '' ) + + if (.not.temporary) then + map%filename = filename + else + call getenv('EZFIO_FILE', map%filename) + map%filename = trim(map%filename) // '/work/tmpfile' + endif + + map%length = int(bytes,8) + do i=1,size(shape) + map%length = map%length * shape(i) + enddo + call mmap(map%filename, & + shape, & + bytes, & + map%fd, & + read_only, & + single_node, & + map%ptr) + + if (temporary) then + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + open(UNIT=47, FILE=trim(map%filename), STATUS='OLD') + close(47,STATUS='DELETE') + endif + + map%d1 => NULL() + map%d2 => NULL() + map%d3 => NULL() + map%d4 => NULL() + map%s1 => NULL() + map%s2 => NULL() + map%s3 => NULL() + map%s4 => NULL() + map%i1 => NULL() + map%i2 => NULL() + map%i3 => NULL() + map%i4 => NULL() + map%i81 => NULL() + map%i82 => NULL() + map%i83 => NULL() + map%i84 => NULL() + + end + + subroutine mmap_create_d(filename, shape, read_only, single_node, map) + implicit none + character*(*), intent(in) :: filename ! Name of the mapped file + integer*8, intent(in) :: shape(:) ! Shape of the array to map + logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node + type(mmap_type), intent(out) :: map ! mmap + + call mmap_create(filename, shape, 8, read_only, single_node, map) + + select case (size(shape)) + case (1) + call c_f_pointer(map%ptr, map%d1, shape) + case (2) + call c_f_pointer(map%ptr, map%d2, shape) + case (3) + call c_f_pointer(map%ptr, map%d3, shape) + case (4) + call c_f_pointer(map%ptr, map%d4, shape) + case default + stop 'mmap: dimension not implemented' + end select + end subroutine + + subroutine mmap_create_s(filename, shape, read_only, single_node, map) + implicit none + character*(*), intent(in) :: filename ! Name of the mapped file + integer*8, intent(in) :: shape(:) ! Shape of the array to map + logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node + type(mmap_type), intent(out) :: map ! mmap + + call mmap_create(filename, shape, 4, read_only, single_node, map) + + select case (size(shape)) + case (1) + call c_f_pointer(map%ptr, map%s1, shape) + case (2) + call c_f_pointer(map%ptr, map%s2, shape) + case (3) + call c_f_pointer(map%ptr, map%s3, shape) + case (4) + call c_f_pointer(map%ptr, map%s4, shape) + case default + stop 'mmap: dimension not implemented' + end select + end subroutine + + subroutine mmap_create_i(filename, shape, read_only, single_node, map) + implicit none + character*(*), intent(in) :: filename ! Name of the mapped file + integer*8, intent(in) :: shape(:) ! Shape of the array to map + logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node + type(mmap_type), intent(out) :: map ! mmap + + call mmap_create(filename, shape, 4, read_only, single_node, map) + + select case (size(shape)) + case (1) + call c_f_pointer(map%ptr, map%i1, shape) + case (2) + call c_f_pointer(map%ptr, map%i2, shape) + case (3) + call c_f_pointer(map%ptr, map%i3, shape) + case (4) + call c_f_pointer(map%ptr, map%i4, shape) + case default + stop 'mmap: dimension not implemented' + end select + end subroutine + + subroutine mmap_create_i8(filename, shape, read_only, single_node, map) + implicit none + character*(*), intent(in) :: filename ! Name of the mapped file + integer*8, intent(in) :: shape(:) ! Shape of the array to map + logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node + type(mmap_type), intent(out) :: map ! mmap + + call mmap_create(filename, shape, 8, read_only, single_node, map) + + select case (size(shape)) + case (1) + call c_f_pointer(map%ptr, map%i81, shape) + case (2) + call c_f_pointer(map%ptr, map%i82, shape) + case (3) + call c_f_pointer(map%ptr, map%i83, shape) + case (4) + call c_f_pointer(map%ptr, map%i84, shape) + case default + stop 'mmap: dimension not implemented' + end select + end subroutine + + subroutine mmap_destroy(map) + implicit none + type(mmap_type), intent(inout) :: map + + call c_munmap_fortran(map%length, map%fd, map%ptr) + + map%ptr = C_NULL_PTR + map%filename = '' + map%length = 0 + map%fd = 0 + map%s1 => NULL() + map%s2 => NULL() + map%s3 => NULL() + map%s4 => NULL() + map%d1 => NULL() + map%d2 => NULL() + map%d3 => NULL() + map%d4 => NULL() + map%i1 => NULL() + map%i2 => NULL() + map%i3 => NULL() + map%i4 => NULL() + map%i81 => NULL() + map%i82 => NULL() + map%i83 => NULL() + map%i84 => NULL() + end subroutine + + + subroutine mmap_sync(map) + implicit none + type(mmap_type), intent(inout) :: map + + call c_msync_fortran(map%length, map%fd, map%ptr) + end subroutine + end module mmap_module diff --git a/src/utils_cc/diis.irp.f b/src/utils_cc/diis.irp.f index fe771373..a64a454f 100644 --- a/src/utils_cc/diis.irp.f +++ b/src/utils_cc/diis.irp.f @@ -53,10 +53,10 @@ subroutine diis_cc(all_err,all_t,sze,m,iter,t) !$OMP END PARALLEL do i = 1, m_iter - B(i,m_iter+1) = -1 + B(i,m_iter+1) = -1.d0 enddo do j = 1, m_iter - B(m_iter+1,j) = -1 + B(m_iter+1,j) = -1.d0 enddo ! Debug !print*,'B' @@ -493,7 +493,7 @@ subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t do i = 1, nO*nV tmp(i) = t1(i) enddo - !$OMP END DO NOWAIT + !$OMP END DO !$OMP DO do i = 1, nO*nO*nV*nV tmp(i+nO*nV) = t2(i) @@ -515,7 +515,7 @@ subroutine update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t do i = 1, nO*nV t1(i) = tmp(i) enddo - !$OMP END DO NOWAIT + !$OMP END DO !$OMP DO do i = 1, nO*nO*nV*nV t2(i) = tmp(i+nO*nV)