From df95c1af1c76abffeec0c34cab901a02880d127c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Apr 2017 17:50:49 +0200 Subject: [PATCH] Corrected memory leak in Davidson --- src/Davidson/davidson_parallel.irp.f | 23 +- src/Davidson/diagonalization_hs2.irp.f | 465 +------------------------ src/Davidson/u0Hu0.irp.f | 26 +- 3 files changed, 38 insertions(+), 476 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index e11a5fdf..4c0bfb4c 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -338,15 +338,22 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8) integer :: istep, imin, imax, ishift - istep=2 - do imin=1,N_det, 524288 - do ishift=0,istep-1 - imax = min(N_det, imin+524288-1) - write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|' - call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) - enddo - enddo +! istep=1 +! do imin=1,N_det, 524288 +! do ishift=0,istep-1 +! imax = min(N_det, imin+524288-1) +! write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|' +! call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) +! enddo +! enddo + istep=N_det/131072+1 + imin=1 + imax=N_det + do ishift=0,istep-1 + write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|' + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) + enddo v_0 = 0.d0 s_0 = 0.d0 diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 853e5cf7..d68d8a68 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -23,7 +23,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag), s2_out(N_st_diag) - double precision, allocatable :: H_jj(:), S2_jj(:) + double precision, allocatable :: H_jj(:) double precision :: diag_H_mat_elem, diag_S_mat_elem integer :: i @@ -32,32 +32,24 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d ASSERT (Nint > 0) ASSERT (Nint == N_int) PROVIDE mo_bielec_integrals_in_map - allocate(H_jj(sze), S2_jj(sze)) + allocate(H_jj(sze) ) !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(sze,H_jj,S2_jj, dets_in,Nint) & + !$OMP SHARED(sze,H_jj, dets_in,Nint) & !$OMP PRIVATE(i) !$OMP DO SCHEDULE(static) do i=1,sze H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) - S2_jj(i) = diag_S_mat_elem(dets_in(1,1,i),Nint) enddo !$OMP END DO !$OMP END PARALLEL - if (disk_based_davidson) then - call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) - else - call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) - endif - do i=1,N_st_diag - s2_out(i) = S2_jj(i) - enddo - deallocate (H_jj,S2_jj) + call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) + deallocate (H_jj) end -subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) +subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) use bitmasks implicit none BEGIN_DOC @@ -65,7 +57,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson ! - ! S2_jj : specific diagonal S^2 matrix elements + ! S2_out : Output : s^2 ! ! dets_in : bitmasks corresponding to determinants ! @@ -87,7 +79,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(in) :: H_jj(sze) - double precision, intent(inout) :: S2_jj(sze) + double precision, intent(inout) :: s2_out(N_st_diag) integer, intent(in) :: iunit double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(out) :: energies(N_st_diag) @@ -434,7 +426,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag energies(k) = lambda(k) - S2_jj(k) = s2(k) + s2_out(k) = s2(k) enddo write_buffer = '===== ' do i=1,N_st @@ -454,442 +446,3 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ) end -subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit) - use bitmasks - use mmap_module - implicit none - BEGIN_DOC - ! Davidson diagonalization with specific diagonal elements of the H matrix - ! - ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson - ! - ! S2_jj : specific diagonal S^2 matrix elements - ! - ! dets_in : bitmasks corresponding to determinants - ! - ! u_in : guess coefficients on the various states. Overwritten - ! on exit - ! - ! dim_in : leftmost dimension of u_in - ! - ! sze : Number of determinants - ! - ! N_st : Number of eigenstates - ! - ! N_st_diag : Number of states in which H is diagonalized. Assumed > sze - ! - ! iunit : Unit for the I/O - ! - ! Initial guess vectors are not necessarily orthonormal - END_DOC - integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint - integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) - double precision, intent(in) :: H_jj(sze) - double precision, intent(inout) :: S2_jj(sze) - integer, intent(in) :: iunit - double precision, intent(inout) :: u_in(dim_in,N_st_diag) - double precision, intent(out) :: energies(N_st_diag) - - integer :: sze_8 - integer :: iter - integer :: i,j,k,l,m - logical :: converged - - double precision :: u_dot_v, u_dot_u - - integer :: k_pairs, kl - - integer :: iter2 - double precision, pointer :: W(:,:), U(:,:), S(:,:), overlap(:,:) - double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) - double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) - double precision :: diag_h_mat_elem - double precision, allocatable :: residual_norm(:) - character*(16384) :: write_buffer - double precision :: to_print(3,N_st) - double precision :: cpu, wall - logical :: state_ok(N_st_diag*davidson_sze_max) - integer :: shift, shift2, itermax - include 'constants.include.F' - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda - if (N_st_diag*3 > sze) then - print *, 'error in Davidson :' - print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 - stop -1 - endif - - PROVIDE nuclear_repulsion expected_s2 - - call write_time(iunit) - call wall_time(wall) - call cpu_time(cpu) - write(iunit,'(A)') '' - write(iunit,'(A)') 'Davidson Diagonalization' - write(iunit,'(A)') '------------------------' - write(iunit,'(A)') '' - call write_int(iunit,N_st,'Number of states') - call write_int(iunit,N_st_diag,'Number of states in diagonalization') - call write_int(iunit,sze,'Number of determinants') - write(iunit,'(A)') '' - write_buffer = '===== ' - do i=1,N_st - write_buffer = trim(write_buffer)//' ================ =========== ===========' - enddo - write(iunit,'(A)') trim(write_buffer) - write_buffer = ' Iter' - do i=1,N_st - write_buffer = trim(write_buffer)//' Energy S^2 Residual ' - enddo - write(iunit,'(A)') trim(write_buffer) - write_buffer = '===== ' - do i=1,N_st - write_buffer = trim(write_buffer)//' ================ =========== ===========' - enddo - write(iunit,'(A)') trim(write_buffer) - - integer, external :: align_double - integer :: fd(3) - type(c_ptr) :: c_pointer(3) - sze_8 = align_double(sze) - - itermax = min(davidson_sze_max, sze/N_st_diag) - - call mmap( & - trim(ezfio_work_dir)//'U', & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & - 8, fd(1), .False., c_pointer(1)) - call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) ) - - call mmap( & - trim(ezfio_work_dir)//'W', & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & - 8, fd(2), .False., c_pointer(2)) - call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) ) - - call mmap( & - trim(ezfio_work_dir)//'S', & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & - 8, fd(3), .False., c_pointer(3)) - call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) ) - - allocate( & - h(N_st_diag*itermax,N_st_diag*itermax), & - y(N_st_diag*itermax,N_st_diag*itermax), & - s_(N_st_diag*itermax,N_st_diag*itermax), & - s_tmp(N_st_diag*itermax,N_st_diag*itermax), & - overlap(N_st_diag*itermax, N_st_diag*itermax), & - residual_norm(N_st_diag), & - c(N_st_diag*itermax), & - s2(N_st_diag*itermax), & - lambda(N_st_diag*itermax)) - - h = 0.d0 - U = 0.d0 - W = 0.d0 - S = 0.d0 - y = 0.d0 - s_ = 0.d0 - s_tmp = 0.d0 - - - ASSERT (N_st > 0) - ASSERT (N_st_diag >= N_st) - ASSERT (sze > 0) - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - - ! Davidson iterations - ! =================== - - converged = .False. - - double precision :: r1, r2 - do k=N_st+1,N_st_diag - u_in(k,k) = 10.d0 - do i=1,sze - call random_number(r1) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - u_in(i,k) = r1*dcos(r2) - enddo - enddo - do k=1,N_st_diag - call normalize(u_in(1,k),sze) - enddo - - - do while (.not.converged) - - do k=1,N_st_diag - do i=1,sze - U(i,k) = u_in(i,k) - enddo - enddo - - do iter=1,itermax-1 - - shift = N_st_diag*(iter-1) - shift2 = N_st_diag*iter - - call ortho_qr(U,size(U,1),sze,shift2) - - ! Compute |W_k> = \sum_i |i> - ! ----------------------------------------- - - - if (distributed_davidson) then - call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) - else - call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) - endif - - - ! Compute h_kl = = - ! ------------------------------------------- - - do k=1,iter - shift = N_st_diag*(k-1) - call dgemm('T','N', N_st_diag, shift2, sze, & - 1.d0, U(1,shift+1), size(U,1), W, size(W,1), & - 0.d0, h(shift+1,1), size(h,1)) - - call dgemm('T','N', N_st_diag, shift2, sze, & - 1.d0, U(1,shift+1), size(U,1), S, size(S,1), & - 0.d0, s_(shift+1,1), size(s_,1)) - enddo - -! ! Diagonalize S^2 -! ! --------------- -! -! call lapack_diag(s2,y,s_,size(s_,1),shift2) -! -! -! ! Rotate H in the basis of eigenfunctions of s2 -! ! --------------------------------------------- -! -! call dgemm('N','N',shift2,shift2,shift2, & -! 1.d0, h, size(h,1), y, size(y,1), & -! 0.d0, s_tmp, size(s_tmp,1)) -! -! call dgemm('T','N',shift2,shift2,shift2, & -! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & -! 0.d0, h, size(h,1)) -! -! ! Damp interaction between different spin states -! ! ------------------------------------------------ -! -! do k=1,shift2 -! do l=1,shift2 -! if (dabs(s2(k) - s2(l)) > 1.d0) then -! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l)))) -! endif -! enddo -! enddo -! -! ! Rotate back H -! ! ------------- -! -! call dgemm('N','T',shift2,shift2,shift2, & -! 1.d0, h, size(h,1), y, size(y,1), & -! 0.d0, s_tmp, size(s_tmp,1)) -! -! call dgemm('N','N',shift2,shift2,shift2, & -! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & -! 0.d0, h, size(h,1)) - - - ! Diagonalize h - ! ------------- - call lapack_diag(lambda,y,h,size(h,1),shift2) - - ! Compute S2 for each eigenvector - ! ------------------------------- - - call dgemm('N','N',shift2,shift2,shift2, & - 1.d0, s_, size(s_,1), y, size(y,1), & - 0.d0, s_tmp, size(s_tmp,1)) - - call dgemm('T','N',shift2,shift2,shift2, & - 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & - 0.d0, s_, size(s_,1)) - - - - do k=1,shift2 - s2(k) = s_(k,k) + S_z2_Sz - enddo - - - if (s2_eig) then - do k=1,shift2 - state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0) - enddo - else - state_ok(k) = .True. - endif - - do k=1,shift2 - if (.not. state_ok(k)) then - do l=k+1,shift2 - if (state_ok(l)) then - call dswap(shift2, y(1,k), 1, y(1,l), 1) - call dswap(1, s2(k), 1, s2(l), 1) - call dswap(1, lambda(k), 1, lambda(l), 1) - state_ok(k) = .True. - state_ok(l) = .False. - exit - endif - enddo - endif - enddo - - if (state_following) then - - ! Compute overlap with U_in - ! ------------------------- - - integer :: order(N_st_diag) - double precision :: cmax - overlap = -1.d0 - do k=1,shift2 - do i=1,shift2 - overlap(k,i) = dabs(y(k,i)) - enddo - enddo - do k=1,N_st - cmax = -1.d0 - do i=1,shift2 - if (overlap(i,k) > cmax) then - cmax = overlap(i,k) - order(k) = i - endif - enddo - do i=1,shift2 - overlap(order(k),i) = -1.d0 - enddo - enddo - overlap = y - do k=1,N_st - l = order(k) - if (k /= l) then - y(1:shift2,k) = overlap(1:shift2,l) - endif - enddo - do k=1,N_st - overlap(k,1) = lambda(k) - overlap(k,2) = s2(k) - enddo - do k=1,N_st - l = order(k) - if (k /= l) then - lambda(k) = overlap(l,1) - s2(k) = overlap(l,2) - endif - enddo - - endif - - - ! Express eigenvectors of h in the determinant basis - ! -------------------------------------------------- - - call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) - call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) - call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1)) - - ! Compute residual vector and davidson step - ! ----------------------------------------- - - do k=1,N_st_diag - if (state_ok(k)) then - do i=1,sze - U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & - )/max(H_jj(i) - lambda (k),1.d-2) - enddo - else - ! Randomize components with bad - do i=1,sze-2,2 - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - U(i,shift2+k) = r1*dcos(r2) - U(i+1,shift2+k) = r1*dsin(r2) - enddo - do i=sze-2+1,sze - call random_number(r1) - call random_number(r2) - r1 = dsqrt(-2.d0*dlog(r1)) - r2 = dtwo_pi*r2 - U(i,shift2+k) = r1*dcos(r2) - enddo - endif - - if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) - to_print(1,k) = lambda(k) + nuclear_repulsion - to_print(2,k) = s2(k) - to_print(3,k) = residual_norm(k) - endif - enddo - - write(iunit,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter, to_print(1:3,1:N_st) - call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) - do k=1,N_st - if (residual_norm(k) > 1.e8) then - print *, '' - stop 'Davidson failed' - endif - enddo - if (converged) then - exit - endif - - enddo - - ! Re-contract to u_in - ! ----------- - - call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & - U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) - - enddo - - do k=1,N_st_diag - energies(k) = lambda(k) - S2_jj(k) = s2(k) - enddo - write_buffer = '===== ' - do i=1,N_st - write_buffer = trim(write_buffer)//' ================ =========== ===========' - enddo - write(iunit,'(A)') trim(write_buffer) - write(iunit,'(A)') '' - call write_time(iunit) - - call munmap( & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & - 8, fd(1), c_pointer(1)) - - call munmap( & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & - 8, fd(2), c_pointer(2)) - - call munmap( & - (/ int(sze_8,8),int(N_st_diag*itermax,8) /), & - 8, fd(3), c_pointer(3)) - - deallocate ( & - residual_norm, & - c, overlap, & - h, & - y, s_, s_tmp, & - lambda & - ) -end - diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index ac70ec7a..6cb7de45 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -101,8 +101,8 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif integer(bit_kind) :: tmp_det2(N_int,2) integer(bit_kind) :: tmp_det3(N_int,2) integer(bit_kind), allocatable :: buffer(:,:) - integer :: n_singles, n_doubles - integer, allocatable :: singles(:), doubles(:) + integer :: n_doubles + integer, allocatable :: doubles(:) integer, allocatable :: singles_a(:) integer, allocatable :: singles_b(:) integer, allocatable :: idx(:), idx0(:) @@ -136,7 +136,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif !$OMP ishift, idx0, u_t, maxab, v_0, s_0) & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP lcol, lrow, l_a, l_b, nmax, & - !$OMP buffer, singles, doubles, n_singles, n_doubles, & + !$OMP buffer, doubles, n_doubles, & !$OMP tmp_det2, hij, sij, idx, l, kcol_prev, v_t, & !$OMP singles_a, n_singles_a, singles_b, & !$OMP n_singles_b, s_t, k8) @@ -145,7 +145,6 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif ! ============================= allocate( buffer(N_int,maxab), & - singles(maxab), & singles_a(maxab), & singles_b(maxab), & doubles(maxab), & @@ -157,7 +156,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif s_t = 0.d0 - !$OMP DO SCHEDULE(static,1024) + !$OMP DO SCHEDULE(dynamic,64) do k_a=istart+ishift,iend,istep krow = psi_bilinear_matrix_rows(k_a) @@ -216,8 +215,9 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif enddo enddo + !$OMP END DO NOWAIT - !$OMP DO SCHEDULE(static,1024) + !$OMP DO SCHEDULE(dynamic,64) do k_a=istart+ishift,iend,istep @@ -256,14 +256,14 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif call get_all_spin_singles_and_doubles( & buffer, idx, spindet, N_int, i, & - singles, doubles, n_singles, n_doubles ) + singles_a, doubles, n_singles_a, n_doubles ) ! Compute Hij for all alpha singles ! ---------------------------------- tmp_det2(1:N_int,2) = psi_det_beta_unique (1:N_int, kcol) - do i=1,n_singles - l_a = singles(i) + do i=1,n_singles_a + l_a = singles_a(i) lrow = psi_bilinear_matrix_rows(l_a) tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 1, hij) @@ -326,14 +326,14 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif call get_all_spin_singles_and_doubles( & buffer, idx, spindet, N_int, i, & - singles, doubles, n_singles, n_doubles ) + singles_b, doubles, n_singles_b, n_doubles ) ! Compute Hij for all beta singles ! ---------------------------------- tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, krow) - do i=1,n_singles - l_b = singles(i) + do i=1,n_singles_b + l_b = singles_b(i) lcol = psi_bilinear_matrix_transp_columns(l_b) tmp_det2(1:N_int,2) = psi_det_beta_unique (1:N_int, lcol) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij) @@ -385,6 +385,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif end do !$OMP END DO NOWAIT + deallocate(buffer, singles_a, singles_b, doubles, idx) !$OMP CRITICAL do l=1,N_st @@ -394,6 +395,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif enddo enddo !$OMP END CRITICAL + deallocate(v_t, s_t) !$OMP BARRIER !$OMP END PARALLEL