From 6492f613a141830cbcf207004d29fbce7fd935ca Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 15 Jul 2016 15:31:16 +0200 Subject: [PATCH 01/15] Removed debug --- ocaml/Address.ml | 2 +- src/Determinants/connected_to_ref.irp.f | 9 +++------ src/Determinants/filter_connected.irp.f | 2 -- src/Determinants/slater_rules.irp.f | 5 ----- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/ocaml/Address.ml b/ocaml/Address.ml index e107cf0c..47eb3fd6 100644 --- a/ocaml/Address.ml +++ b/ocaml/Address.ml @@ -42,7 +42,7 @@ end = struct assert (String.is_prefix ~prefix:"inproc://" x); x let create name = - Printf.sprintf "ipc://%s" name + Printf.sprintf "inproc://%s" name let to_string x = x end diff --git a/src/Determinants/connected_to_ref.irp.f b/src/Determinants/connected_to_ref.irp.f index 7a54bdbc..c0b611be 100644 --- a/src/Determinants/connected_to_ref.irp.f +++ b/src/Determinants/connected_to_ref.irp.f @@ -109,8 +109,6 @@ integer function get_index_in_psi_det_sorted_bit(key,Nint) continue else in_wavefunction = .True. - !DIR$ IVDEP - !DIR$ LOOP COUNT MIN(3) do l=2,Nint if ( (key(l,1) /= psi_det_sorted_bit(l,1,i)).or. & (key(l,2) /= psi_det_sorted_bit(l,2,i)) ) then @@ -175,7 +173,6 @@ logical function is_connected_to(key,keys,Nint,Ndet) do i=1,Ndet degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) + & popcnt(xor( key(1,2), keys(1,2,i))) - !DEC$ LOOP COUNT MIN(3) do l=2,Nint degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +& popcnt(xor( key(l,2), keys(l,2,i))) @@ -208,7 +205,6 @@ logical function is_connected_to_by_mono(key,keys,Nint,Ndet) do i=1,Ndet degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) + & popcnt(xor( key(1,2), keys(1,2,i))) - !DEC$ LOOP COUNT MIN(3) do l=2,Nint degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +& popcnt(xor( key(l,2), keys(l,2,i))) @@ -302,10 +298,12 @@ integer function connected_to_ref(key,keys,Nint,N_past_in,Ndet) do i=N_past-1,1,-1 degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) + & popcnt(xor( key(1,2), keys(1,2,i))) - !DEC$ LOOP COUNT MIN(3) do l=2,Nint degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +& popcnt(xor( key(l,2), keys(l,2,i))) + if (degree_x2 > 4) then + exit + endif enddo if (degree_x2 > 4) then cycle @@ -406,7 +404,6 @@ integer function connected_to_ref_by_mono(key,keys,Nint,N_past_in,Ndet) do i=N_past-1,1,-1 degree_x2 = popcnt(xor( key(1,1), keys(1,1,i))) + & popcnt(xor( key(1,2), keys(1,2,i))) - !DEC$ LOOP COUNT MIN(3) do l=2,Nint degree_x2 = degree_x2 + popcnt(xor( key(l,1), keys(l,1,i))) +& popcnt(xor( key(l,2), keys(l,2,i))) diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index 46280b31..34d6feb9 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -299,7 +299,6 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) else - integer, save :: icount(4) = (/0,0,0,0/) !DIR$ LOOP COUNT (1000) outer: do i=1,sze degree_x2 = 0 @@ -317,7 +316,6 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) enddo idx(l) = i l = l+1 - icount(3) = icount(3) + 1_8 enddo outer endif diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 967ac9a3..ec7eb76d 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1249,7 +1249,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx) l=1 if (Nint==1) then - !DIR$ LOOP COUNT (1000) do i=1,sze d = popcnt(xor( key1(1,1,i), key2(1,1))) + & popcnt(xor( key1(1,2,i), key2(1,2))) @@ -1264,7 +1263,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx) else if (Nint==2) then - !DIR$ LOOP COUNT (1000) do i=1,sze d = popcnt(xor( key1(1,1,i), key2(1,1))) + & popcnt(xor( key1(1,2,i), key2(1,2))) + & @@ -1281,7 +1279,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx) else if (Nint==3) then - !DIR$ LOOP COUNT (1000) do i=1,sze d = popcnt(xor( key1(1,1,i), key2(1,1))) + & popcnt(xor( key1(1,2,i), key2(1,2))) + & @@ -1300,10 +1297,8 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx) else - !DIR$ LOOP COUNT (1000) do i=1,sze d = 0 - !DIR$ LOOP COUNT MIN(4) do m=1,Nint d = d + popcnt(xor( key1(m,1,i), key2(m,1))) & + popcnt(xor( key1(m,2,i), key2(m,2))) From b780a6540abaa08142be8d8912131a50bb41c6ca Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 14 Oct 2016 12:40:29 +0200 Subject: [PATCH 02/15] bugs in mrcepa0_general and mrcc_utils --- plugins/MRCC_Utils/mrcc_utils.irp.f | 5 +++-- plugins/mrcepa0/mrcepa0_general.irp.f | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 79249051..81893781 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -644,7 +644,7 @@ END_PROVIDER AtA_ind = 0 AtA_val = 0d0 x = 0d0 - A_val_mwen = 0d0 + !A_val_mwen = 0d0 N_col = 0 col_shortcut = 0 @@ -699,7 +699,8 @@ END_PROVIDER !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) allocate(A_val_mwen(nex), A_ind_mwen(nex)) - A_ind_mwen = 0 + !A_ind_mwen = 0 + !A_val_mwen = 0d0 !$OMP DO schedule(dynamic, 100) do at_row = 1, nex wk = 0 diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f index 1e7ad68d..63f03360 100644 --- a/plugins/mrcepa0/mrcepa0_general.irp.f +++ b/plugins/mrcepa0/mrcepa0_general.irp.f @@ -28,7 +28,7 @@ subroutine run(N_st,energy) enddo SOFT_TOUCH psi_coef ci_energy_dressed call write_double(6,ci_energy_dressed(1),"Final MRCC energy") - call ezfio_set_mrcc_cassd_energy(ci_energy_dressed(1)) + call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) call save_wavefunction energy(:) = ci_energy_dressed(:) else From 1f4cd4c318847520bf7fd84c7e545c5cadabf870 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 17 Oct 2016 14:40:09 +0200 Subject: [PATCH 03/15] optimized calculation of inactive amplitudes --- plugins/MRCC_Utils/mrcc_utils.irp.f | 500 +++++++++++++++++----------- 1 file changed, 301 insertions(+), 199 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 81893781..14885153 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -616,204 +616,301 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] &BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ] - implicit none - logical :: ok - integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row - integer, external :: searchDet, unsortedSearchDet - integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) - integer :: N, INFO, AtA_size, r1, r2 - double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) - double precision :: t, norm, cx, res - integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) - double precision :: phase - - - - nex = hh_shortcut(hh_shortcut(0)+1)-1 - print *, "TI", nex, N_det_non_ref - allocate(A_ind(0:N_det_ref+1, nex), A_val(N_det_ref+1, nex)) - allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! - allocate(x(nex), AtB(nex)) - allocate(N_col(nex), col_shortcut(nex)) - allocate(x_new(nex)) - - do s = 1, N_states - - A_val = 0d0 - A_ind = 0 - AtA_ind = 0 - AtA_val = 0d0 - x = 0d0 - !A_val_mwen = 0d0 - N_col = 0 - col_shortcut = 0 - - !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& - !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& - !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk) - allocate(lref(N_det_non_ref)) - !$OMP DO schedule(static,10) - do hh = 1, hh_shortcut(0) - do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 - lref = 0 - do II = 1, N_det_ref - call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) - if(.not. ok) cycle - call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) - if(.not. ok) cycle - ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) - if(ind /= -1) then - call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) - if (phase > 0.d0) then - lref(psi_non_ref_sorted_idx(ind)) = II - else - lref(psi_non_ref_sorted_idx(ind)) = -II - endif - end if - end do - wk = 0 - do i=1, N_det_non_ref - if(lref(i) > 0) then - wk += 1 - A_val(wk, pp) = psi_ref_coef(lref(i), s) - A_ind(wk, pp) = i - else if(lref(i) < 0) then - wk += 1 - A_val(wk, pp) = -psi_ref_coef(-lref(i), s) - A_ind(wk, pp) = i - end if - end do - A_ind(0,pp) = wk - end do - end do - !$OMP END DO - deallocate(lref) - !$OMP END PARALLEL - print *, 'Done building A_val, A_ind' - - AtB = 0d0 - AtA_size = 0 - col_shortcut = 0 - N_col = 0 - !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& - !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& - !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) - allocate(A_val_mwen(nex), A_ind_mwen(nex)) - !A_ind_mwen = 0 - !A_val_mwen = 0d0 - !$OMP DO schedule(dynamic, 100) - do at_row = 1, nex - wk = 0 - if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex - do i=1,A_ind(0,at_row) - AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) - end do + implicit none + logical :: ok + integer :: i, j, k, s, II, pp, ppp, hh, ind, wk, nex, a_col, at_row + integer, external :: searchDet, unsortedSearchDet + integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) + integer :: N, INFO, AtA_size, r1, r2 + double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) + double precision :: t, norm, cx, res + integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) + double precision :: phase + + + integer, allocatable :: pathTo(:), active_hh_idx(:), active_pp_idx(:) + logical, allocatable :: active(:) + double precision, allocatable :: rho_mrcc_init(:,:) + integer :: nactive + + nex = hh_shortcut(hh_shortcut(0)+1)-1 + print *, "TI", nex, N_det_non_ref + + allocate(pathTo(N_det_non_ref), active(nex)) + allocate(active_pp_idx(nex), active_hh_idx(nex)) + allocate(rho_mrcc_init(N_det_non_ref, N_states)) + + pathTo = 0 + active = .false. + nactive = 0 + + + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + do II = 1, N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind == -1) cycle + ind = psi_non_ref_sorted_idx(ind) + if(pathTo(ind) == 0) then + pathTo(ind) = pp + else + active(pp) = .true. + active(pathTo(ind)) = .true. + end if + end do + end do + end do + + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + if(active(pp)) then + nactive = nactive + 1 + active_hh_idx(nactive) = hh + active_pp_idx(nactive) = pp + end if + end do + end do + + print *, nactive, "inact/", size(active) + + allocate(A_ind(0:N_det_ref+1, nactive), A_val(N_det_ref+1, nactive)) + allocate(AtA_ind(N_det_ref * nactive), AtA_val(N_det_ref * nactive)) + allocate(x(nex), AtB(nex)) + allocate(N_col(nactive), col_shortcut(nactive)) + allocate(x_new(nex)) + - do a_col = 1, nex - t = 0d0 - r1 = 1 - r2 = 1 - do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) - if(A_ind(r1, at_row) > A_ind(r2, a_col)) then - r2 = r2+1 - else if(A_ind(r1, at_row) < A_ind(r2, a_col)) then - r1 = r1+1 - else - t = t - A_val(r1, at_row) * A_val(r2, a_col) - r1 = r1+1 - r2 = r2+1 - end if - end do - - if(a_col == at_row) then - t = t + 1.d0 - end if - if(t /= 0.d0) then - wk += 1 - A_ind_mwen(wk) = a_col - A_val_mwen(wk) = t - end if - end do - - if(wk /= 0) then - !$OMP CRITICAL - col_shortcut(at_row) = AtA_size+1 - N_col(at_row) = wk - if (AtA_size+wk > size(AtA_ind,1)) then - print *, AtA_size+wk , size(AtA_ind,1) - stop 'too small' - endif - do i=1,wk - AtA_ind(AtA_size+i) = A_ind_mwen(i) - AtA_val(AtA_size+i) = A_val_mwen(i) - enddo - AtA_size += wk - !$OMP END CRITICAL - end if - end do - !$OMP END DO NOWAIT - deallocate (A_ind_mwen, A_val_mwen) - !$OMP END PARALLEL - - if(AtA_size > size(AtA_val)) stop "SIZA" - print *, "ATA SIZE", ata_size - do i=1,nex - x(i) = AtB(i) - enddo - - double precision :: factor, resold - factor = 1.d0 - resold = huge(1.d0) - do k=0,100000 - !$OMP PARALLEL default(shared) private(cx, i, j, a_col) - - !$OMP DO - do i=1,N_det_non_ref - rho_mrcc(i,s) = 0.d0 - enddo - !$OMP END DO - - !$OMP DO - do a_col = 1, nex - cx = 0d0 - do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 - cx = cx + x(AtA_ind(i)) * AtA_val(i) - end do - x_new(a_col) = AtB(a_col) + cx * factor - end do - !$OMP END DO - - !$OMP END PARALLEL - - res = 0.d0 - do a_col=1,nex - res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) - end do - - if (res < resold) then - do a_col=1,nex - do j=1,N_det_non_ref - i = A_ind(j,a_col) - if (i==0) exit - rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col) - enddo - X(a_col) = X_new(a_col) - end do -! factor = 1.d0 - else - factor = -factor * 0.5d0 - endif - resold = res + + do s = 1, N_states + + A_val = 0d0 + A_ind = 0 + AtA_ind = 0 + AtB = 0d0 + AtA_val = 0d0 + x = 0d0 + N_col = 0 + col_shortcut = 0 + + !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& + !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& + !$OMP shared(active, active_hh_idx, active_pp_idx, nactive)& + !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh) + allocate(lref(N_det_non_ref)) + !$OMP DO schedule(static,10) + do ppp=1,nactive + pp = active_pp_idx(ppp) + hh = active_hh_idx(ppp) + lref = 0 + do II = 1, N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind /= -1) then + call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) + if (phase > 0.d0) then + lref(psi_non_ref_sorted_idx(ind)) = II + else + lref(psi_non_ref_sorted_idx(ind)) = -II + endif + end if + end do + wk = 0 + do i=1, N_det_non_ref + if(lref(i) > 0) then + wk += 1 + A_val(wk, ppp) = psi_ref_coef(lref(i), s) + A_ind(wk, ppp) = i + else if(lref(i) < 0) then + wk += 1 + A_val(wk, ppp) = -psi_ref_coef(-lref(i), s) + A_ind(wk, ppp) = i + end if + end do + A_ind(0,ppp) = wk + end do + !$OMP END DO + deallocate(lref) + !$OMP END PARALLEL - if(mod(k, 100) == 0) then - print *, "res", k, res - end if - - if(res < 1d-8) exit - end do - ! rho_mrcc now contains A.X - norm = 0.d0 + print *, 'Done building A_val, A_ind' + + AtA_size = 0 + col_shortcut = 0 + N_col = 0 + integer :: a_coll, at_roww + + + !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& + !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)& + !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s, nactive, active_pp_idx) + allocate(A_val_mwen(nex), A_ind_mwen(nex)) + + !$OMP DO schedule(dynamic, 100) + do at_roww = 1, nactive ! nex + at_row = active_pp_idx(at_roww) + wk = 0 + if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", nex + do i=1,A_ind(0,at_roww) + j = active_pp_idx(i) + AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_roww), s) * A_val(i, at_roww) + end do + + do a_coll = 1, nactive + a_col = active_pp_idx(a_coll) + t = 0d0 + r1 = 1 + r2 = 1 + do while ((A_ind(r1, at_roww) /= 0).and.(A_ind(r2, a_coll) /= 0)) + if(A_ind(r1, at_roww) > A_ind(r2, a_coll)) then + r2 = r2+1 + else if(A_ind(r1, at_roww) < A_ind(r2, a_coll)) then + r1 = r1+1 + else + t = t - A_val(r1, at_roww) * A_val(r2, a_coll) + r1 = r1+1 + r2 = r2+1 + end if + end do + + if(a_col == at_row) then + t = t + 1.d0 + end if + if(t /= 0.d0) then + wk += 1 + A_ind_mwen(wk) = a_col + A_val_mwen(wk) = t + end if + end do + + if(wk /= 0) then + !$OMP CRITICAL + col_shortcut(at_roww) = AtA_size+1 + N_col(at_roww) = wk + if (AtA_size+wk > size(AtA_ind,1)) then + print *, AtA_size+wk , size(AtA_ind,1) + stop 'too small' + endif + do i=1,wk + AtA_ind(AtA_size+i) = A_ind_mwen(i) + AtA_val(AtA_size+i) = A_val_mwen(i) + enddo + AtA_size += wk + !$OMP END CRITICAL + end if + end do + !$OMP END DO NOWAIT + deallocate (A_ind_mwen, A_val_mwen) + !$OMP END PARALLEL + + print *, "ATA SIZE", ata_size + x = 0d0 + + + do a_coll = 1, nactive + a_col = active_pp_idx(a_coll) + X(a_col) = AtB(a_col) + end do + + rho_mrcc_init = 0d0 + + allocate(lref(N_det_ref)) + !$OMP PARALLEL DO default(shared) schedule(static, 1) & + !$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase) + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + if(active(pp)) cycle + lref = 0 + do II=1,N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind == -1) cycle + ind = psi_non_ref_sorted_idx(ind) + call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) + X(pp) += psi_ref_coef(II,s)**2 + AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase + lref(II) = ind + if(phase < 0d0) lref(II) = -ind + end do + X(pp) = AtB(pp) / X(pp) + do II=1,N_det_ref + if(lref(II) > 0) then + rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp) + else if(lref(II) < 0) then + rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp) + end if + end do + end do + end do + !$OMP END PARALLEL DO + + x_new = x + + double precision :: factor, resold + factor = 1.d0 + resold = huge(1.d0) + do k=0,100000 + !$OMP PARALLEL default(shared) private(cx, i, j, a_col, a_coll) + + !$OMP DO + do i=1,N_det_non_ref + rho_mrcc(i,s) = rho_mrcc_init(i,s) ! 0d0 + enddo + !$OMP END DO + + !$OMP DO + do a_coll = 1, nactive !: nex + a_col = active_pp_idx(a_coll) + cx = 0d0 + do i=col_shortcut(a_coll), col_shortcut(a_coll) + N_col(a_coll) - 1 + cx = cx + x(AtA_ind(i)) * AtA_val(i) + end do + x_new(a_col) = AtB(a_col) + cx * factor + end do + !$OMP END DO + + !$OMP END PARALLEL + + res = 0.d0 + + + if (res < resold) then + do a_coll=1,nactive ! nex + a_col = active_pp_idx(a_coll) + do j=1,N_det_non_ref + i = A_ind(j,a_coll) + if (i==0) exit + rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_coll) * X_new(a_col) + enddo + res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) + X(a_col) = X_new(a_col) + end do + factor = 1.d0 + else + factor = -factor * 0.5d0 + endif + resold = res + + if(mod(k, 5) == 0) then + print *, "res ", k, res + end if + + if(res < 1d-12) exit + end do + + + + norm = 0.d0 do i=1,N_det_non_ref norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) enddo @@ -826,7 +923,7 @@ END_PROVIDER print *, k, "res : ", res, "norm : ", sqrt(norm) - dIj_unique(:size(X), s) = X(:) + !dIj_unique(:size(X), s) = X(:) norm = 0.d0 double precision :: f @@ -871,12 +968,14 @@ END_PROVIDER enddo ! rho_mrcc now contains the product of the scaling factors and the ! normalization constant - - end do - + + dIj_unique(:size(X), s) = X(:) + end do END_PROVIDER + + BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] integer :: s,i,j double precision, external :: get_dij_index @@ -1142,3 +1241,6 @@ subroutine apply_particle_local(det, exc, res, ok, Nint) ok = .true. end subroutine + + + From 62e8d1a0ac21f1981f08f85ed41cd038c3dcd2df Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 13:38:45 +0200 Subject: [PATCH 04/15] The qp_run queue now pops from the back --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- src/Davidson/u0Hu0.irp.f | 2 +- src/Determinants/H_apply_zmq.template.f | 2 +- src/Integrals_Bielec/ao_bi_integrals.irp.f | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index c81b1266..a5dd8dcf 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -134,7 +134,7 @@ subroutine ZMQ_selection(N_in, pt2) step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) - do i= N_det_generators, 1, -step + do i= 1,N_det_generators, step i_generator_start = max(i-step+1,1) i_generator_max = i write(task,*) i_generator_start, i_generator_max, 1, N diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index a1a72100..3787370a 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -252,7 +252,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) ave_workload = ave_workload/dble(shortcut(0,1)) - do sh=shortcut(0,1),1,-1 + do sh=1,shortcut(0,1),1 workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 do i=sh, shortcut(0,2), shortcut(0,1) do j=i, min(i, shortcut(0,2)) diff --git a/src/Determinants/H_apply_zmq.template.f b/src/Determinants/H_apply_zmq.template.f index d59f2994..59544b79 100644 --- a/src/Determinants/H_apply_zmq.template.f +++ b/src/Determinants/H_apply_zmq.template.f @@ -35,7 +35,7 @@ subroutine $subroutine($params_main) call zmq_put_psi(zmq_to_qp_run_socket,1,energy,size(energy)) - do i_generator=N_det_generators,1,-1 + do i_generator=1,N_det_generators $skip write(task,*) i_generator call add_task_to_taskserver(zmq_to_qp_run_socket,task) diff --git a/src/Integrals_Bielec/ao_bi_integrals.irp.f b/src/Integrals_Bielec/ao_bi_integrals.irp.f index 2ebb402e..9eadbf35 100644 --- a/src/Integrals_Bielec/ao_bi_integrals.irp.f +++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f @@ -368,7 +368,7 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] call new_parallel_job(zmq_to_qp_run_socket,'ao_integrals') - do l=1,ao_num + do l=ao_num,1,-1 write(task,*) "triangle ", l call add_task_to_taskserver(zmq_to_qp_run_socket,task) enddo From 973065319ce7bdd46c3fab86f4edfb102341e107 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 19:29:50 +0200 Subject: [PATCH 05/15] Introduced QR in Davidson --- ocaml/Progress_bar.ml | 4 +- src/Davidson/diagonalization_hs2.irp.f | 143 ++++++------------------- src/Utils/LinearAlgebra.irp.f | 44 +++++++- 3 files changed, 71 insertions(+), 120 deletions(-) diff --git a/ocaml/Progress_bar.ml b/ocaml/Progress_bar.ml index 2ca8bd00..b8e97a59 100644 --- a/ocaml/Progress_bar.ml +++ b/ocaml/Progress_bar.ml @@ -14,13 +14,13 @@ type t = let init ?(bar_length=20) ?(start_value=0.) ?(end_value=1.) ~title = { title ; start_value ; end_value ; bar_length ; cur_value=start_value ; - init_time= Time.now () ; dirty = true ; next = Time.now () } + init_time= Time.now () ; dirty = false ; next = Time.now () } let update ~cur_value bar = { bar with cur_value ; dirty=true } let increment_end bar = - { bar with end_value=(bar.end_value +. 1.) ; dirty=true } + { bar with end_value=(bar.end_value +. 1.) ; dirty=false } let increment_cur bar = { bar with cur_value=(bar.cur_value +. 1.) ; dirty=true } diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index c8ac3733..2db6b4cd 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -95,7 +95,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s double precision :: u_dot_v, u_dot_u - integer, allocatable :: kl_pairs(:,:) integer :: k_pairs, kl integer :: iter2 @@ -107,12 +106,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2 + integer :: shift, shift2, itermax include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, S, y, h, lambda - if (N_st_diag > sze) then - stop 'error in Davidson : N_st_diag > sze' + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif PROVIDE nuclear_repulsion @@ -147,26 +148,26 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer, external :: align_double sze_8 = align_double(sze) + itermax = min(davidson_sze_max, sze/N_st_diag) allocate( & - kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & - W(sze_8,N_st_diag*davidson_sze_max), & - U(sze_8,N_st_diag*davidson_sze_max), & + W(sze_8,N_st_diag*itermax), & + U(sze_8,N_st_diag*itermax), & R(sze_8,N_st_diag), & - S(sze_8,N_st_diag*davidson_sze_max), & - h(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & - y(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & - s_(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & - s_tmp(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + S(sze_8,N_st_diag*itermax), & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & residual_norm(N_st_diag), & - c(N_st_diag*davidson_sze_max), & - s2(N_st_diag*davidson_sze_max), & - lambda(N_st_diag*davidson_sze_max)) + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & + lambda(N_st_diag*itermax)) h = 0.d0 s_ = 0.d0 s_tmp = 0.d0 - c = 0.d0 U = 0.d0 + W = 0.d0 S = 0.d0 R = 0.d0 y = 0.d0 @@ -183,10 +184,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s converged = .False. - do k=1,N_st - call normalize(u_in(1,k),sze) - enddo - do k=N_st+1,N_st_diag do i=1,sze double precision :: r1, r2 @@ -194,14 +191,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call random_number(r2) u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2) enddo - - ! Gram-Schmidt - ! ------------ - call dgemv('T',sze,k-1,1.d0,u_in,size(u_in,1), & - u_in(1,k),1,0.d0,c,1) - call dgemv('N',sze,k-1,-1.d0,u_in,size(u_in,1), & - c,1,1.d0,u_in(1,k),1) - call normalize(u_in(1,k),sze) enddo @@ -213,11 +202,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo enddo - do iter=1,davidson_sze_max-1 + do iter=1,itermax-1 shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter + call ortho_qr(U,size(U,1),sze,shift2) ! Compute |W_k> = \sum_i |i> ! ----------------------------------------- @@ -229,20 +219,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! Compute h_kl = = ! ------------------------------------------- - -! do l=1,N_st_diag -! do k=1,N_st_diag -! do iter2=1,iter-1 -! h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze) -! h(k,iter,l,iter2) = h(k,iter2,l,iter) -! enddo -! enddo -! do k=1,l -! h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze) -! h(l,iter,k,iter) = h(k,iter,l,iter) -! enddo -! enddo - call dgemm('T','N', shift2, N_st_diag, sze, & 1.d0, U, size(U,1), W(1,shift+1), size(W,1), & 0.d0, h(1,shift+1), size(h,1)) @@ -295,22 +271,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- -! do k=1,N_st_diag -! do i=1,sze -! U(i,shift2+k) = 0.d0 -! W(i,shift2+k) = 0.d0 -! S(i,shift2+k) = 0.d0 -! enddo -! do l=1,N_st_diag*iter -! do i=1,sze -! U(i,shift2+k) = U(i,shift2+k) + U(i,l)*y(l,k) -! W(i,shift2+k) = W(i,shift2+k) + W(i,l)*y(l,k) -! S(i,shift2+k) = S(i,shift2+k) + S(i,l)*y(l,k) -! enddo -! enddo -! enddo -! -! call dgemm('N','N', sze, N_st_diag, shift2, & 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & @@ -321,13 +281,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! Compute residual vector ! ----------------------- -! do k=1,N_st_diag -! print *, s2(k) -! s2(k) = u_dot_v(U(1,shift2+k), S(1,shift2+k), sze) + S_z2_Sz -! print *, s2(k) -! print *, '' -! pause -! enddo do k=1,N_st_diag do i=1,sze R(i,k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & @@ -338,14 +291,17 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s to_print(1,k) = lambda(k) + nuclear_repulsion to_print(2,k) = s2(k) to_print(3,k) = residual_norm(k) - if (residual_norm(k) > 1.e9) then - stop 'Davidson failed' - endif endif enddo - write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(:,1:N_st) + write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3,A20))') iter, to_print(:,1:N_st), '' call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) + do k=1,N_st + if (residual_norm(k) > 1.e9) then + print *, '' + stop 'Davidson failed' + endif + enddo if (converged) then exit endif @@ -359,42 +315,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo enddo - ! Gram-Schmidt - ! ------------ - - do k=1,N_st_diag - -! do l=1,N_st_diag*iter -! c(1) = u_dot_v(U(1,shift2+k),U(1,l),sze) -! do i=1,sze -! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,l) -! enddo -! enddo -! - call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1), & - U(1,shift2+k),1,0.d0,c,1) - call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1), & - c,1,1.d0,U(1,shift2+k),1) -! -! do l=1,k-1 -! c(1) = u_dot_v(U(1,shift2+k),U(1,shift2+l),sze) -! do i=1,sze -! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,shift2+l) -! enddo -! enddo -! - call dgemv('T',sze,k-1,1.d0,U(1,shift2+1),size(U,1), & - U(1,shift2+k),1,0.d0,c,1) - call dgemv('N',sze,k-1,-1.d0,U(1,shift2+1),size(U,1), & - c,1,1.d0,U(1,shift2+k),1) - - call normalize( U(1,shift2+k), sze ) - enddo - enddo if (.not.converged) then - iter = davidson_sze_max-1 + iter = itermax-1 endif ! Re-contract to u_in @@ -404,20 +328,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s energies(k) = lambda(k) enddo -! do k=1,N_st_diag -! do i=1,sze -! do l=1,iter*N_st_diag -! u_in(i,k) += U(i,l)*y(l,k) -! enddo -! enddo -! enddo -! enddo - call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) enddo + do k=1,N_st_diag + S2_jj(k) = s2(k) + enddo write_buffer = '===== ' do i=1,N_st write_buffer = trim(write_buffer)//' ================ =========== ===========' @@ -427,7 +345,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call write_time(iunit) deallocate ( & - kl_pairs, & W, residual_norm, & U, & R, c, S, & diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 00f61101..e44e8c2c 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -11,9 +11,9 @@ subroutine svd(A,LDA,U,LDU,D,Vt,LDVt,m,n) integer, intent(in) :: LDA, LDU, LDVt, m, n double precision, intent(in) :: A(LDA,n) - double precision, intent(out) :: U(LDU,n) + double precision, intent(out) :: U(LDU,m) double precision,intent(out) :: Vt(LDVt,n) - double precision,intent(out) :: D(n) + double precision,intent(out) :: D(min(m,n)) double precision,allocatable :: work(:) integer :: info, lwork, i, j, k @@ -24,13 +24,13 @@ subroutine svd(A,LDA,U,LDU,D,Vt,LDVt,m,n) ! Find optimal size for temp arrays allocate(work(1)) lwork = -1 - call dgesvd('A','A', n, n, A_tmp, LDA, & + call dgesvd('A','A', m, n, A_tmp, LDA, & D, U, LDU, Vt, LDVt, work, lwork, info) lwork = work(1) deallocate(work) allocate(work(lwork)) - call dgesvd('A','A', n, n, A_tmp, LDA, & + call dgesvd('A','A', m, n, A_tmp, LDA, & D, U, LDU, Vt, LDVt, work, lwork, info) deallocate(work,A_tmp) @@ -125,6 +125,40 @@ subroutine ortho_canonical(overlap,LDA,N,C,LDC,m) end +subroutine ortho_qr(A,LDA,m,n) + implicit none + BEGIN_DOC + ! Orthogonalization using Q.R factorization + ! + ! A : matrix to orthogonalize + ! + ! LDA : leftmost dimension of A + ! + ! n : Number of rows of A + ! + ! m : Number of columns of A + ! + END_DOC + integer, intent(in) :: m,n, LDA + double precision, intent(inout) :: A(LDA,n) + + integer :: lwork, info + integer, allocatable :: jpvt(:) + double precision, allocatable :: tau(:), work(:) + + allocate (jpvt(n), tau(n), work(1)) + LWORK=-1 +! call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO) + call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) + LWORK=WORK(1) + deallocate(WORK) + allocate(WORK(LWORK)) +! call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO) + call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) + call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO) + deallocate(WORK,jpvt,tau) +end + subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m) implicit none BEGIN_DOC @@ -161,7 +195,7 @@ subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m) allocate(U(ldc,n),Vt(lda,n),S_half(lda,n),D(n)) - call svd(overlap,lda,U,ldc,D,Vt,lda,m,n) + call svd(overlap,lda,U,ldc,D,Vt,lda,n,n) !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(S_half,U,D,Vt,n,C,m) & From 1fe1750f90bdd0176e2cd8f34499f39c650931de Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 21:36:45 +0200 Subject: [PATCH 06/15] Removed residual in Davdison --- src/Davidson/diagonalization_hs2.irp.f | 46 ++++++++++++++------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 2db6b4cd..abffcf81 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -98,7 +98,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer :: k_pairs, kl integer :: iter2 - double precision, allocatable :: W(:,:), U(:,:), R(:,:), S(:,:) + double precision, allocatable :: W(:,:), U(:,:), S(:,:) double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:) double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:) double precision :: diag_h_mat_elem @@ -109,7 +109,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer :: shift, shift2, itermax include 'constants.include.F' - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, S, y, h, lambda + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda if (N_st_diag*3 > sze) then print *, 'error in Davidson :' print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 @@ -152,7 +152,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s allocate( & W(sze_8,N_st_diag*itermax), & U(sze_8,N_st_diag*itermax), & - R(sze_8,N_st_diag), & S(sze_8,N_st_diag*itermax), & h(N_st_diag*itermax,N_st_diag*itermax), & y(N_st_diag*itermax,N_st_diag*itermax), & @@ -169,7 +168,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s U = 0.d0 W = 0.d0 S = 0.d0 - R = 0.d0 y = 0.d0 @@ -184,12 +182,24 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s converged = .False. - do k=N_st+1,N_st_diag + double precision :: r1, r2 + do k=N_st+1,N_st_diag-2,2 do i=1,sze - double precision :: r1, r2 call random_number(r1) call random_number(r2) - u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + u_in(i,k+1) = r1*dsin(r2) + enddo + enddo + do k=N_st_diag-1,N_st_diag + do i=1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) enddo enddo @@ -278,16 +288,17 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call dgemm('N','N', sze, N_st_diag, shift2, & 1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1)) - ! Compute residual vector - ! ----------------------- + ! Compute residual vector and davidson step + ! ----------------------------------------- do k=1,N_st_diag do i=1,sze - R(i,k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz) + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz) & + /max(H_jj(i) - lambda (k),1.d-2) enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(R(1,k),sze) + residual_norm(k) = u_dot_u(U(1,shift2+k),sze) to_print(1,k) = lambda(k) + nuclear_repulsion to_print(2,k) = s2(k) to_print(3,k) = residual_norm(k) @@ -306,15 +317,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s exit endif - ! Davidson step - ! ------------- - - do k=1,N_st_diag - do i=1,sze - U(i,shift2+k) = - R(i,k)/max(H_jj(i) - lambda(k),1.d-2) - enddo - enddo - enddo if (.not.converged) then @@ -347,7 +349,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s deallocate ( & W, residual_norm, & U, & - R, c, S, & + c, S, & h, & y, s_, s_tmp, & lambda & From 4119577ae8335d49548f875219a1b7a4263e81ba Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 22:20:46 +0200 Subject: [PATCH 07/15] Minor changes --- src/Davidson/diagonalization_hs2.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index abffcf81..a7bc2b95 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -294,8 +294,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag do i=1,sze U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & - * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz) & - /max(H_jj(i) - lambda (k),1.d-2) + * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz & + )/max(H_jj(i) - lambda (k),1.d-2) enddo if (k <= N_st) then residual_norm(k) = u_dot_u(U(1,shift2+k),sze) @@ -305,10 +305,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s endif enddo - write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3,A20))') iter, to_print(:,1:N_st), '' + write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3,A30))') iter, to_print(:,1:N_st), '' call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) do k=1,N_st - if (residual_norm(k) > 1.e9) then + if (residual_norm(k) > 1.e4) then print *, '' stop 'Davidson failed' endif From 2f1c7c5ce90f1b107992b5b7c0e461cf8a3d1a63 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 23:07:03 +0200 Subject: [PATCH 08/15] Small changes in MRCC --- plugins/MRCC_Utils/mrcc_dress.irp.f | 2 +- plugins/MRCC_Utils/mrcc_utils.irp.f | 323 +++++++++++++++++++--------- plugins/mrcepa0/EZFIO.cfg | 2 +- plugins/mrcepa0/dressing.irp.f | 37 ++-- 4 files changed, 241 insertions(+), 123 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_dress.irp.f b/plugins/MRCC_Utils/mrcc_dress.irp.f index e6d0fb81..5c2f5efc 100644 --- a/plugins/MRCC_Utils/mrcc_dress.irp.f +++ b/plugins/MRCC_Utils/mrcc_dress.irp.f @@ -271,7 +271,7 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Nstates, Ndet_non_ref, Ndet_ref,i_ge !delta_ii_(i_state,i_I) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) - delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) + delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0 * dIa_hla(i_state,k_sd) enddo endif enddo diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 14885153..f1c4b4a3 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -685,7 +685,7 @@ END_PROVIDER - do s = 1, N_states + do s=1, N_states A_val = 0d0 A_ind = 0 @@ -698,61 +698,61 @@ END_PROVIDER !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& - !$OMP shared(active, active_hh_idx, active_pp_idx, nactive)& + !$OMP shared(active, active_hh_idx, active_pp_idx, nactive) & !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh) - allocate(lref(N_det_non_ref)) - !$OMP DO schedule(static,10) - do ppp=1,nactive - pp = active_pp_idx(ppp) - hh = active_hh_idx(ppp) - lref = 0 - do II = 1, N_det_ref - call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) - if(.not. ok) cycle - call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) - if(.not. ok) cycle - ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) - if(ind /= -1) then - call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) - if (phase > 0.d0) then - lref(psi_non_ref_sorted_idx(ind)) = II - else - lref(psi_non_ref_sorted_idx(ind)) = -II - endif - end if - end do - wk = 0 - do i=1, N_det_non_ref - if(lref(i) > 0) then - wk += 1 - A_val(wk, ppp) = psi_ref_coef(lref(i), s) - A_ind(wk, ppp) = i - else if(lref(i) < 0) then - wk += 1 - A_val(wk, ppp) = -psi_ref_coef(-lref(i), s) - A_ind(wk, ppp) = i - end if - end do - A_ind(0,ppp) = wk + allocate(lref(N_det_non_ref)) + !$OMP DO schedule(static,10) + do ppp=1,nactive + pp = active_pp_idx(ppp) + hh = active_hh_idx(ppp) + lref = 0 + do II = 1, N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind /= -1) then + call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) + if (phase > 0.d0) then + lref(psi_non_ref_sorted_idx(ind)) = II + else + lref(psi_non_ref_sorted_idx(ind)) = -II + endif + end if end do + wk = 0 + do i=1, N_det_non_ref + if(lref(i) > 0) then + wk += 1 + A_val(wk, ppp) = psi_ref_coef(lref(i), s) + A_ind(wk, ppp) = i + else if(lref(i) < 0) then + wk += 1 + A_val(wk, ppp) = -psi_ref_coef(-lref(i), s) + A_ind(wk, ppp) = i + end if + end do + A_ind(0,ppp) = wk + end do !$OMP END DO deallocate(lref) - !$OMP END PARALLEL - - + !$OMP END PARALLEL + + print *, 'Done building A_val, A_ind' AtA_size = 0 col_shortcut = 0 N_col = 0 - integer :: a_coll, at_roww + integer :: a_coll, at_roww !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)& !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s, nactive, active_pp_idx) allocate(A_val_mwen(nex), A_ind_mwen(nex)) - + !$OMP DO schedule(dynamic, 100) do at_roww = 1, nactive ! nex at_row = active_pp_idx(at_roww) @@ -762,8 +762,8 @@ END_PROVIDER j = active_pp_idx(i) AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_roww), s) * A_val(i, at_roww) end do - - do a_coll = 1, nactive + + do a_coll = 1, nactive a_col = active_pp_idx(a_coll) t = 0d0 r1 = 1 @@ -795,12 +795,12 @@ END_PROVIDER col_shortcut(at_roww) = AtA_size+1 N_col(at_roww) = wk if (AtA_size+wk > size(AtA_ind,1)) then - print *, AtA_size+wk , size(AtA_ind,1) - stop 'too small' + print *, AtA_size+wk , size(AtA_ind,1) + stop 'too small' endif do i=1,wk - AtA_ind(AtA_size+i) = A_ind_mwen(i) - AtA_val(AtA_size+i) = A_val_mwen(i) + AtA_ind(AtA_size+i) = A_ind_mwen(i) + AtA_val(AtA_size+i) = A_val_mwen(i) enddo AtA_size += wk !$OMP END CRITICAL @@ -822,41 +822,41 @@ END_PROVIDER rho_mrcc_init = 0d0 allocate(lref(N_det_ref)) - !$OMP PARALLEL DO default(shared) schedule(static, 1) & + !$OMP PARALLEL DO default(shared) schedule(static, 1) & !$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase) do hh = 1, hh_shortcut(0) - do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 - if(active(pp)) cycle - lref = 0 - do II=1,N_det_ref - call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) - if(.not. ok) cycle - call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) - if(.not. ok) cycle - ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) - if(ind == -1) cycle - ind = psi_non_ref_sorted_idx(ind) - call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) - X(pp) += psi_ref_coef(II,s)**2 - AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase - lref(II) = ind - if(phase < 0d0) lref(II) = -ind + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + if(active(pp)) cycle + lref = 0 + do II=1,N_det_ref + call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind == -1) cycle + ind = psi_non_ref_sorted_idx(ind) + call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) + X(pp) += psi_ref_coef(II,s)**2 + AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase + lref(II) = ind + if(phase < 0d0) lref(II) = -ind + end do + X(pp) = AtB(pp) / X(pp) + do II=1,N_det_ref + if(lref(II) > 0) then + rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp) + else if(lref(II) < 0) then + rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp) + end if + end do end do - X(pp) = AtB(pp) / X(pp) - do II=1,N_det_ref - if(lref(II) > 0) then - rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp) - else if(lref(II) < 0) then - rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp) - end if - end do - end do end do !$OMP END PARALLEL DO x_new = x - - double precision :: factor, resold + + double precision :: factor, resold factor = 1.d0 resold = huge(1.d0) do k=0,100000 @@ -882,10 +882,10 @@ END_PROVIDER !$OMP END PARALLEL res = 0.d0 - + if (res < resold) then - do a_coll=1,nactive ! nex + do a_coll=1,nactive ! nex a_col = active_pp_idx(a_coll) do j=1,N_det_non_ref i = A_ind(j,a_coll) @@ -894,60 +894,172 @@ END_PROVIDER enddo res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col)) X(a_col) = X_new(a_col) - end do - factor = 1.d0 + end do + factor = 1.d0 else factor = -factor * 0.5d0 endif resold = res - - if(mod(k, 5) == 0) then + + if(mod(k, 100) == 0) then print *, "res ", k, res end if - if(res < 1d-12) exit + if(res < 1d-9) exit end do norm = 0.d0 - do i=1,N_det_non_ref - norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) - enddo - ! Norm now contains the norm of A.X - - do i=1,N_det_ref - norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) - enddo - ! Norm now contains the norm of Psi + A.X - - print *, k, "res : ", res, "norm : ", sqrt(norm) - - !dIj_unique(:size(X), s) = X(:) + do i=1,N_det_non_ref + norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) + enddo + ! Norm now contains the norm of A.X + + do i=1,N_det_ref + norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) + enddo + ! Norm now contains the norm of Psi + A.X + + print *, k, "res : ", res, "norm : ", sqrt(norm) + +!--------------- +! double precision :: e_0, overlap +! double precision, allocatable :: u_0(:) +! integer(bit_kind), allocatable :: keys_tmp(:,:,:) +! allocate (u_0(N_det), keys_tmp(N_int,2,N_det) ) +! k=0 +! overlap = 0.d0 +! do i=1,N_det_ref +! k = k+1 +! u_0(k) = psi_ref_coef(i,1) +! keys_tmp(:,:,k) = psi_ref(:,:,i) +! overlap += u_0(k)*psi_ref_coef(i,1) +! enddo +! norm = 0.d0 +! do i=1,N_det_non_ref +! k = k+1 +! u_0(k) = psi_non_ref_coef(i,1) +! keys_tmp(:,:,k) = psi_non_ref(:,:,i) +! overlap += u_0(k)*psi_non_ref_coef(i,1) +! enddo +! +! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) +! print *, 'Energy of |Psi_CASSD> : ', e_0 + nuclear_repulsion, overlap +! +! k=0 +! overlap = 0.d0 +! do i=1,N_det_ref +! k = k+1 +! u_0(k) = psi_ref_coef(i,1) +! keys_tmp(:,:,k) = psi_ref(:,:,i) +! overlap += u_0(k)*psi_ref_coef(i,1) +! enddo +! norm = 0.d0 +! do i=1,N_det_non_ref +! k = k+1 +! ! f is such that f.\tilde{c_i} = c_i +! f = psi_non_ref_coef(i,1) / rho_mrcc(i,1) +! +! ! Avoid numerical instabilities +! f = min(f,2.d0) +! f = max(f,-2.d0) +! +! f = 1.d0 +! +! u_0(k) = rho_mrcc(i,1)*f +! keys_tmp(:,:,k) = psi_non_ref(:,:,i) +! norm += u_0(k)**2 +! overlap += u_0(k)*psi_non_ref_coef(i,1) +! enddo +! +! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) +! print *, 'Energy of |(1+T)Psi_0> : ', e_0 + nuclear_repulsion, overlap +! +! f = 1.d0/norm +! norm = 1.d0 +! do i=1,N_det_ref +! norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) +! enddo +! f = dsqrt(f*norm) +! overlap = norm +! do i=1,N_det_non_ref +! u_0(k) = rho_mrcc(i,1)*f +! overlap += u_0(k)*psi_non_ref_coef(i,1) +! enddo +! +! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) +! print *, 'Energy of |(1+T)Psi_0> (normalized) : ', e_0 + nuclear_repulsion, overlap +! +! k=0 +! overlap = 0.d0 +! do i=1,N_det_ref +! k = k+1 +! u_0(k) = psi_ref_coef(i,1) +! keys_tmp(:,:,k) = psi_ref(:,:,i) +! overlap += u_0(k)*psi_ref_coef(i,1) +! enddo +! norm = 0.d0 +! do i=1,N_det_non_ref +! k = k+1 +! ! f is such that f.\tilde{c_i} = c_i +! f = psi_non_ref_coef(i,1) / rho_mrcc(i,1) +! +! ! Avoid numerical instabilities +! f = min(f,2.d0) +! f = max(f,-2.d0) +! +! u_0(k) = rho_mrcc(i,1)*f +! keys_tmp(:,:,k) = psi_non_ref(:,:,i) +! norm += u_0(k)**2 +! overlap += u_0(k)*psi_non_ref_coef(i,1) +! enddo +! +! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) +! print *, 'Energy of |(1+T)Psi_0> (mu_i): ', e_0 + nuclear_repulsion, overlap +! +! f = 1.d0/norm +! norm = 1.d0 +! do i=1,N_det_ref +! norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) +! enddo +! overlap = norm +! f = dsqrt(f*norm) +! do i=1,N_det_non_ref +! u_0(k) = rho_mrcc(i,1)*f +! overlap += u_0(k)*psi_non_ref_coef(i,1) +! enddo +! +! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det) +! print *, 'Energy of |(1+T)Psi_0> (normalized mu_i) : ', e_0 + nuclear_repulsion, overlap +! +! deallocate(u_0, keys_tmp) +! +!--------------- norm = 0.d0 - double precision :: f + double precision :: f do i=1,N_det_non_ref if (rho_mrcc(i,s) == 0.d0) then rho_mrcc(i,s) = 1.d-32 endif - + ! f is such that f.\tilde{c_i} = c_i f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) - + ! Avoid numerical instabilities f = min(f,2.d0) f = max(f,-2.d0) - + norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) rho_mrcc(i,s) = f enddo ! norm now contains the norm of |T.Psi_0> ! rho_mrcc now contains the f factors - + f = 1.d0/norm ! f now contains 1/ - + norm = 1.d0 do i=1,N_det_ref norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) @@ -955,22 +1067,23 @@ END_PROVIDER ! norm now contains f = dsqrt(f*norm) ! f normalises T.Psi_0 such that (1+T)|Psi> is normalized - + norm = norm*f print *, 'norm of |T Psi_0> = ', dsqrt(norm) - + do i=1,N_det_ref norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) enddo - + do i=1,N_det_non_ref rho_mrcc(i,s) = rho_mrcc(i,s) * f enddo ! rho_mrcc now contains the product of the scaling factors and the ! normalization constant - dIj_unique(:size(X), s) = X(:) + dIj_unique(:size(X), s) = X(:) end do + END_PROVIDER diff --git a/plugins/mrcepa0/EZFIO.cfg b/plugins/mrcepa0/EZFIO.cfg index d792390d..61f3392f 100644 --- a/plugins/mrcepa0/EZFIO.cfg +++ b/plugins/mrcepa0/EZFIO.cfg @@ -23,7 +23,7 @@ interface: ezfio type: Threshold doc: Threshold on the convergence of the dressed CI energy interface: ezfio,provider,ocaml -default: 1.e-4 +default: 5.e-5 [n_it_max_dressed_ci] type: Strictly_positive_int diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 8df7e91a..4f355f2b 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -299,7 +299,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe delta_ii_(i_state,i_I) = 0.d0 do l_sd=1,idx_alpha(0) k_sd = idx_alpha(l_sd) - delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd) + delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd) enddo endif enddo @@ -554,7 +554,7 @@ END_PROVIDER do k=1,N_det_non_ref call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) - call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki) +! call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki) delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int) @@ -647,7 +647,7 @@ end function integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref) logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1) - double precision :: contrib, HIIi, HJk, wall + double precision :: contrib, contrib2, HIIi, HJk, wall integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2) integer(bit_kind),allocatable :: sortRef(:,:,:) @@ -677,7 +677,7 @@ end function delta_mrcepa0_ij(:,:,:) = 0d0 !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii) & - !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib) & + !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2) & !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active) & !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) & !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij) @@ -720,16 +720,18 @@ end function !$OMP ATOMIC notf = notf+1 - call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) - !contrib = delta_cas(II, J, i_state) * HJk * lambda_mrcc(i_state, det_cepa0_idx(k)) +! call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk) contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state) - !$OMP ATOMIC - delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then + contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) !$OMP ATOMIC - delta_mrcepa0_ii(J,i_state) -= contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state) + delta_mrcepa0_ii(J,i_state) -= contrib2 + else + contrib = contrib * 0.5d0 end if + !$OMP ATOMIC + delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib end do kloop end do @@ -753,7 +755,7 @@ END_PROVIDER integer :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_ logical :: ok double precision :: phase_Ji, phase_Ik, phase_Ii - double precision :: contrib, delta_IJk, HJk, HIk, HIl + double precision :: contrib, contrib2, delta_IJk, HJk, HIk, HIl integer, dimension(0:2,2,2) :: exc_Ik, exc_Ji, exc_Ii integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2) integer, allocatable :: idx_sorted_bit(:) @@ -778,7 +780,7 @@ END_PROVIDER !$OMP PARALLEL DO default(none) schedule(dynamic,10) shared(delta_sub_ij, delta_sub_ii) & !$OMP private(i, J, k, degree, degree2, l, deg, ni) & !$OMP private(p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_) & - !$OMP private(ok, phase_Ji, phase_Ik, phase_Ii, contrib, delta_IJk, HJk, HIk, HIl, exc_Ik, exc_Ji, exc_Ii) & + !$OMP private(ok, phase_Ji, phase_Ik, phase_Ii, contrib2, contrib, delta_IJk, HJk, HIk, HIl, exc_Ik, exc_Ji, exc_Ii) & !$OMP private(det_tmp, det_tmp2, II, blok) & !$OMP shared(idx_sorted_bit, N_det_non_ref, N_det_ref, N_int, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef) & !$OMP shared(i_state,lambda_mrcc, hf_bitmask, active_sorb) @@ -827,13 +829,16 @@ END_PROVIDER delta_IJk = HJk * HIk * lambda_mrcc(i_state, k) call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int) if(ok) cycle - contrib = delta_IJk * HIl * lambda_mrcc(i_state,l) + contrib = delta_IJk * HIl * lambda_mrcc(i_state,l) + if(dabs(psi_ref_coef(II,i_state)).ge.5.d-5) then + contrib2 = contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state) + !$OMP ATOMIC + delta_sub_ii(II,i_state) -= contrib2 + else + contrib = contrib * 0.5d0 + endif !$OMP ATOMIC delta_sub_ij(II, i, i_state) += contrib - if(dabs(psi_ref_coef(II,i_state)).ge.5.d-5) then - !$OMP ATOMIC - delta_sub_ii(II,i_state) -= contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state) - endif end do end do end do From 360d38a41de3c30cdba6ce8fbadb50d0e18b82f4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 23:10:04 +0200 Subject: [PATCH 09/15] Format error in loc_cele --- plugins/loc_cele/loc_cele.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/loc_cele/loc_cele.irp.f b/plugins/loc_cele/loc_cele.irp.f index c9036aa1..52e0ef28 100644 --- a/plugins/loc_cele/loc_cele.irp.f +++ b/plugins/loc_cele/loc_cele.irp.f @@ -451,7 +451,7 @@ enddo !big loop over symmetry - 10 format (4E18.12) + 10 format (4E19.12) ! Now we copyt the newcmo into the mo_coef From 1185d70be7404841abb9d4c0c419ba41f91af49d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Oct 2016 23:31:20 +0200 Subject: [PATCH 10/15] Removed all ipc between Fortran and OCaml --- ocaml/TaskServer.ml | 4 ++-- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- plugins/MRCC_Utils/mrcc_utils.irp.f | 16 ++++++++-------- plugins/mrcepa0/dressing.irp.f | 1 - src/Davidson/davidson_parallel.irp.f | 4 ++-- src/ZMQ/utils.irp.f | 27 ++++++++++----------------- 6 files changed, 23 insertions(+), 31 deletions(-) diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 9a1797f8..6edc8122 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -678,9 +678,9 @@ let run ~port = (** Debug input *) Printf.sprintf "q:%d r:%d n:%d : %s\n%!" - (Queuing_system.number_of_queued program_state.queue) + (Queuing_system.number_of_queued program_state.queue) (Queuing_system.number_of_running program_state.queue) - (Queuing_system.number_of_tasks program_state.queue) + (Queuing_system.number_of_tasks program_state.queue) (Message.to_string message) |> debug; diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index a5dd8dcf..c81b1266 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -134,7 +134,7 @@ subroutine ZMQ_selection(N_in, pt2) step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) step = max(1,step) - do i= 1,N_det_generators, step + do i= N_det_generators, 1, -step i_generator_start = max(i-step+1,1) i_generator_max = i write(task,*) i_generator_start, i_generator_max, 1, N diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index f1c4b4a3..84bca0b4 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -1043,23 +1043,23 @@ END_PROVIDER if (rho_mrcc(i,s) == 0.d0) then rho_mrcc(i,s) = 1.d-32 endif - + ! f is such that f.\tilde{c_i} = c_i f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) - + ! Avoid numerical instabilities f = min(f,2.d0) f = max(f,-2.d0) - + norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) rho_mrcc(i,s) = f enddo ! norm now contains the norm of |T.Psi_0> ! rho_mrcc now contains the f factors - + f = 1.d0/norm ! f now contains 1/ - + norm = 1.d0 do i=1,N_det_ref norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s) @@ -1067,14 +1067,14 @@ END_PROVIDER ! norm now contains f = dsqrt(f*norm) ! f normalises T.Psi_0 such that (1+T)|Psi> is normalized - + norm = norm*f print *, 'norm of |T Psi_0> = ', dsqrt(norm) - + do i=1,N_det_ref norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) enddo - + do i=1,N_det_non_ref rho_mrcc(i,s) = rho_mrcc(i,s) * f enddo diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 4f355f2b..3646b0b2 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -554,7 +554,6 @@ END_PROVIDER do k=1,N_det_non_ref call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk) -! call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki) delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k) !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 50b58f67..cede52c9 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -501,7 +501,7 @@ subroutine davidson_miniserver_end() integer rc character*(64) buf - address = trim(qp_run_address_tcp)//':11223' + address = trim(qp_run_address)//':11223' requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) @@ -520,7 +520,7 @@ subroutine davidson_miniserver_get() character*(20) buffer integer rc - address = trim(qp_run_address_tcp)//':11223' + address = trim(qp_run_address)//':11223' requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 84665199..f2703ff8 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -17,8 +17,6 @@ END_PROVIDER BEGIN_PROVIDER [ character*(128), qp_run_address ] -&BEGIN_PROVIDER [ character*(128), qp_run_address_ipc ] -&BEGIN_PROVIDER [ character*(128), qp_run_address_tcp ] &BEGIN_PROVIDER [ integer, zmq_port_start ] use f77_zmq implicit none @@ -36,22 +34,19 @@ END_PROVIDER integer :: i do i=len(buffer),1,-1 if ( buffer(i:i) == ':') then - qp_run_address_tcp = trim(buffer(1:i-1)) + qp_run_address = trim(buffer(1:i-1)) read(buffer(i+1:), *) zmq_port_start exit endif enddo - qp_run_address_ipc = 'ipc:///tmp/qp_run' - qp_run_address = qp_run_address_ipc END_PROVIDER - BEGIN_PROVIDER [ character*(128), zmq_socket_pull_tcp_address ] -&BEGIN_PROVIDER [ character*(128), zmq_socket_pull_inproc_address ] &BEGIN_PROVIDER [ character*(128), zmq_socket_pair_inproc_address ] &BEGIN_PROVIDER [ character*(128), zmq_socket_push_tcp_address ] +&BEGIN_PROVIDER [ character*(128), zmq_socket_pull_inproc_address ] &BEGIN_PROVIDER [ character*(128), zmq_socket_push_inproc_address ] -&BEGIN_PROVIDER [ character*(128), zmq_socket_sub_address ] +&BEGIN_PROVIDER [ character*(128), zmq_socket_sub_tcp_address ] use f77_zmq implicit none BEGIN_DOC @@ -59,12 +54,12 @@ END_PROVIDER END_DOC character*(8), external :: zmq_port + zmq_socket_sub_tcp_address = trim(qp_run_address)//':'//zmq_port(1)//' ' zmq_socket_pull_tcp_address = 'tcp://*:'//zmq_port(2)//' ' + zmq_socket_push_tcp_address = trim(qp_run_address)//':'//zmq_port(2)//' ' zmq_socket_pull_inproc_address = 'inproc://'//zmq_port(2)//' ' - zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' ' - zmq_socket_push_tcp_address = trim(qp_run_address_tcp)//':'//zmq_port(2)//' ' zmq_socket_push_inproc_address = zmq_socket_pull_inproc_address - zmq_socket_sub_address = trim(qp_run_address)//':'//zmq_port(1)//' ' + zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' ' ! /!\ Don't forget to change subroutine reset_zmq_addresses END_PROVIDER @@ -77,13 +72,12 @@ subroutine reset_zmq_addresses END_DOC character*(8), external :: zmq_port + zmq_socket_sub_tcp_address = trim(qp_run_address)//':'//zmq_port(1)//' ' zmq_socket_pull_tcp_address = 'tcp://*:'//zmq_port(2)//' ' + zmq_socket_push_tcp_address = trim(qp_run_address)//':'//zmq_port(2)//' ' zmq_socket_pull_inproc_address = 'inproc://'//zmq_port(2)//' ' - zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' ' - zmq_socket_push_tcp_address = trim(qp_run_address_tcp)//':'//zmq_port(2)//' ' zmq_socket_push_inproc_address = zmq_socket_pull_inproc_address - zmq_socket_sub_address = trim(qp_run_address)//':'//zmq_port(1)//' ' - + zmq_socket_pair_inproc_address = 'inproc://'//zmq_port(3)//' ' end @@ -111,7 +105,6 @@ subroutine switch_qp_run_to_master exit endif enddo - qp_run_address_tcp = qp_run_address call reset_zmq_addresses end @@ -374,7 +367,7 @@ function new_zmq_sub_socket() stop 'Unable to subscribe new_zmq_sub_socket' endif - rc = f77_zmq_connect(new_zmq_sub_socket, zmq_socket_sub_address) + rc = f77_zmq_connect(new_zmq_sub_socket, zmq_socket_sub_tcp_address) if (rc /= 0) then stop 'Unable to connect new_zmq_sub_socket' endif From 8802d988490bff8ffa656b1484d6526a343c65b1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 27 Oct 2016 13:49:29 +0200 Subject: [PATCH 11/15] wrong dimensions in s2_out --- src/Davidson/diagonalization_hs2.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index a7bc2b95..d7ec11b6 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -22,7 +22,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint, iunit integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(inout) :: u_in(dim_in,N_st_diag) - double precision, intent(out) :: energies(N_st), s2_out(N_st) + double precision, intent(out) :: energies(N_st), s2_out(N_st_diag) double precision, allocatable :: H_jj(:), S2_jj(:) double precision :: diag_h_mat_elem From afc4111e24dc5f0578346b863fa0ebfee4fdf9a6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Oct 2016 17:10:49 +0200 Subject: [PATCH 12/15] Fixed "Unable to bind socket" --- configure | 5 ++++- src/ZMQ/utils.irp.f | 35 ++++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/configure b/configure index 19016136..8cb02608 100755 --- a/configure +++ b/configure @@ -487,7 +487,6 @@ def create_ninja_and_rc(l_installed): l_rc = [ 'export QP_ROOT={0}'.format(QP_ROOT), - '#export QP_NIC=ib0 # Choose the correct network inuterface', 'export QP_EZFIO={0}'.format(path_ezfio.replace(QP_ROOT,"${QP_ROOT}")), 'export QP_PYTHON={0}'.format(":".join(l_python)), "", 'export IRPF90={0}'.format(path_irpf90.replace(QP_ROOT,"${QP_ROOT}")), @@ -498,6 +497,10 @@ def create_ninja_and_rc(l_installed): 'export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"', "", 'source ${QP_ROOT}/install/EZFIO/Bash/ezfio.sh', "", 'source ${HOME}/.opam/opam-init/init.sh > /dev/null 2> /dev/null || true', + '', + '# Choose the correct network interface', + '# export QP_NIC=ib0', + '# export QP_NIC=eth0', "" ] diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index f2703ff8..444c33fe 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -257,15 +257,36 @@ function new_zmq_pull_socket() stop 'Unable to set ZMQ_RCVHWM on pull socket' endif - rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_tcp_address) - if (rc /= 0) then - print *, 'Unable to bind new_zmq_pull_socket (tcp)', zmq_socket_pull_tcp_address + integer :: icount + + icount = 10 + do while (icount > 0) + rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_inproc_address) + if (rc /= 0) then + icount = icount-1 + call sleep(3) + endif + enddo + + if (icount == 0) then + print *, 'Unable to bind new_zmq_pull_socket (inproc)', zmq_socket_pull_inproc_address stop endif - - rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_inproc_address) - if (rc /= 0) then - stop 'Unable to bind new_zmq_pull_socket (inproc)' + + + icount = 10 + do while (icount > 0) + rc = f77_zmq_bind(new_zmq_pull_socket, zmq_socket_pull_tcp_address) + if (rc /= 0) then + icount = icount-1 + call sleep(3) + endif + + enddo + + if (icount == 0) then + print *, 'Unable to bind new_zmq_pull_socket (tcp)', zmq_socket_pull_tcp_address + stop endif end From 08ac74cc2dfe8c7f91b36c6e6da2ecb8911a2808 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Oct 2016 18:18:46 +0200 Subject: [PATCH 13/15] Fixed binding bug --- src/ZMQ/utils.irp.f | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 444c33fe..b2b5b7b4 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -265,12 +265,14 @@ function new_zmq_pull_socket() if (rc /= 0) then icount = icount-1 call sleep(3) + else + exit endif enddo if (icount == 0) then print *, 'Unable to bind new_zmq_pull_socket (inproc)', zmq_socket_pull_inproc_address - stop + stop -1 endif @@ -280,13 +282,14 @@ function new_zmq_pull_socket() if (rc /= 0) then icount = icount-1 call sleep(3) + else + exit endif - enddo if (icount == 0) then print *, 'Unable to bind new_zmq_pull_socket (tcp)', zmq_socket_pull_tcp_address - stop + stop -1 endif end From 156a3f551bc859c232bb181e31d83d9cd4ce5ca7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Oct 2016 21:59:39 +0200 Subject: [PATCH 14/15] Accelerated integral access --- src/Integrals_Bielec/map_integrals.irp.f | 85 ++++++++++++++---------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/src/Integrals_Bielec/map_integrals.irp.f b/src/Integrals_Bielec/map_integrals.irp.f index afc573aa..22fd48a6 100644 --- a/src/Integrals_Bielec/map_integrals.irp.f +++ b/src/Integrals_Bielec/map_integrals.irp.f @@ -120,15 +120,16 @@ end END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_integrals_cache, (ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max,ao_integrals_cache_min:ao_integrals_cache_max) ] +BEGIN_PROVIDER [ double precision, ao_integrals_cache, (0:64*64*64*64) ] implicit none BEGIN_DOC ! Cache of AO integrals for fast access END_DOC PROVIDE ao_bielec_integrals_in_map - integer :: i,j,k,l + integer :: i,j,k,l,ii integer(key_kind) :: idx - !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx) + real(integral_kind) :: integral + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) do l=ao_integrals_cache_min,ao_integrals_cache_max do k=ao_integrals_cache_min,ao_integrals_cache_max do j=ao_integrals_cache_min,ao_integrals_cache_max @@ -136,7 +137,12 @@ BEGIN_PROVIDER [ double precision, ao_integrals_cache, (ao_integrals_cache_min:a !DIR$ FORCEINLINE call bielec_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE - call map_get(ao_integrals_map,idx,ao_integrals_cache(i,j,k,l)) + call map_get(ao_integrals_map,idx,integral) + ii = l-ao_integrals_cache_min + ii = ior( ishft(ii,6), k-ao_integrals_cache_min) + ii = ior( ishft(ii,6), j-ao_integrals_cache_min) + ii = ior( ishft(ii,6), i-ao_integrals_cache_min) + ao_integrals_cache(ii) = integral enddo enddo enddo @@ -155,30 +161,33 @@ double precision function get_ao_bielec_integral(i,j,k,l,map) integer, intent(in) :: i,j,k,l integer(key_kind) :: idx type(map_type), intent(inout) :: map + integer :: ii real(integral_kind) :: tmp - PROVIDE ao_bielec_integrals_in_map + PROVIDE ao_bielec_integrals_in_map ao_integrals_cache ao_integrals_cache_min !DIR$ FORCEINLINE if (ao_overlap_abs(i,k)*ao_overlap_abs(j,l) < ao_integrals_threshold ) then tmp = 0.d0 else if (ao_bielec_integral_schwartz(i,k)*ao_bielec_integral_schwartz(j,l) < ao_integrals_threshold) then tmp = 0.d0 else - if ( (i >= ao_integrals_cache_min) .and. & - (j >= ao_integrals_cache_min) .and. & - (k >= ao_integrals_cache_min) .and. & - (l >= ao_integrals_cache_min) .and. & - (i <= ao_integrals_cache_max) .and. & - (j <= ao_integrals_cache_max) .and. & - (k <= ao_integrals_cache_max) .and. & - (l <= ao_integrals_cache_max) ) then - tmp = ao_integrals_cache(i,j,k,l) - else + ii = l-ao_integrals_cache_min + ii = ior(ii, k-ao_integrals_cache_min) + ii = ior(ii, j-ao_integrals_cache_min) + ii = ior(ii, i-ao_integrals_cache_min) + if (iand(ii, -64) /= 0) then !DIR$ FORCEINLINE call bielec_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE call map_get(map,idx,tmp) + get_ao_bielec_integral = dble(tmp) + else + ii = l-ao_integrals_cache_min + ii = ior( ishft(ii,6), k-ao_integrals_cache_min) + ii = ior( ishft(ii,6), j-ao_integrals_cache_min) + ii = ior( ishft(ii,6), i-ao_integrals_cache_min) + get_ao_bielec_integral = ao_integrals_cache(ii) endif endif - get_ao_bielec_integral = tmp end @@ -324,20 +333,22 @@ end ! Min and max values of the MOs for which the integrals are in the cache END_DOC mo_integrals_cache_min = max(1,elec_alpha_num - 31) - mo_integrals_cache_max = min(mo_tot_num,elec_alpha_num + 32) + mo_integrals_cache_max = min(mo_tot_num,mo_integrals_cache_min+63) END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max,mo_integrals_cache_min:mo_integrals_cache_max) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:64*64*64*64) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_bielec_integrals_in_map integer :: i,j,k,l + integer :: ii integer(key_kind) :: idx + real(integral_kind) :: integral FREE ao_integrals_cache - !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx) + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) do l=mo_integrals_cache_min,mo_integrals_cache_max do k=mo_integrals_cache_min,mo_integrals_cache_max do j=mo_integrals_cache_min,mo_integrals_cache_max @@ -345,7 +356,12 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (mo_integrals_cache_min:m !DIR$ FORCEINLINE call bielec_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE - call map_get(mo_integrals_map,idx,mo_integrals_cache(i,j,k,l)) + call map_get(mo_integrals_map,idx,integral) + ii = l-mo_integrals_cache_min + ii = ior( ishft(ii,6), k-mo_integrals_cache_min) + ii = ior( ishft(ii,6), j-mo_integrals_cache_min) + ii = ior( ishft(ii,6), i-mo_integrals_cache_min) + mo_integrals_cache(ii) = integral enddo enddo enddo @@ -362,24 +378,26 @@ double precision function get_mo_bielec_integral(i,j,k,l,map) END_DOC integer, intent(in) :: i,j,k,l integer(key_kind) :: idx + integer :: ii type(map_type), intent(inout) :: map real(integral_kind) :: tmp PROVIDE mo_bielec_integrals_in_map mo_integrals_cache - if ( (i >= mo_integrals_cache_min) .and. & - (j >= mo_integrals_cache_min) .and. & - (k >= mo_integrals_cache_min) .and. & - (l >= mo_integrals_cache_min) .and. & - (i <= mo_integrals_cache_max) .and. & - (j <= mo_integrals_cache_max) .and. & - (k <= mo_integrals_cache_max) .and. & - (l <= mo_integrals_cache_max) ) then - get_mo_bielec_integral = mo_integrals_cache(i,j,k,l) - else + ii = l-mo_integrals_cache_min + ii = ior(ii, k-mo_integrals_cache_min) + ii = ior(ii, j-mo_integrals_cache_min) + ii = ior(ii, i-mo_integrals_cache_min) + if (iand(ii, -64) /= 0) then !DIR$ FORCEINLINE call bielec_integrals_index(i,j,k,l,idx) !DIR$ FORCEINLINE call map_get(map,idx,tmp) get_mo_bielec_integral = dble(tmp) + else + ii = l-mo_integrals_cache_min + ii = ior( ishft(ii,6), k-mo_integrals_cache_min) + ii = ior( ishft(ii,6), j-mo_integrals_cache_min) + ii = ior( ishft(ii,6), i-mo_integrals_cache_min) + get_mo_bielec_integral = mo_integrals_cache(ii) endif end @@ -390,16 +408,15 @@ double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map) ! Returns one integral in the MO basis END_DOC integer, intent(in) :: i,j,k,l - integer(key_kind) :: idx type(map_type), intent(inout) :: map - real(integral_kind) :: tmp + double precision, external :: get_mo_bielec_integral + PROVIDE mo_bielec_integrals_in_map mo_integrals_cache if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then - double precision, external :: get_mo_bielec_integral !DIR$ FORCEINLINE get_mo_bielec_integral_schwartz = get_mo_bielec_integral(i,j,k,l,map) else - tmp = 0.d0 + get_mo_bielec_integral_schwartz = 0.d0 endif end From 3946c710feb8454d53f74ba8dc16390c2de01429 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Oct 2016 22:26:20 +0200 Subject: [PATCH 15/15] Accelerated mono-excitations (mipi miip) --- src/Determinants/slater_rules.irp.f | 131 +++------------------ src/Integrals_Bielec/map_integrals.irp.f | 1 + src/Integrals_Bielec/mo_bi_integrals.irp.f | 25 ++++ 3 files changed, 40 insertions(+), 117 deletions(-) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 67463088..7df6e79e 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -515,8 +515,6 @@ subroutine i_H_j(key_i,key_j,Nint,hij) integer :: occ(Nint*bit_kind_size,2) double precision :: diag_H_mat_elem, phase,phase_2 integer :: n_occ_ab(2) - logical :: has_mipi(Nint*bit_kind_size) - double precision :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size) PROVIDE mo_bielec_integrals_in_map mo_integrals_map ASSERT (Nint > 0) @@ -568,59 +566,27 @@ subroutine i_H_j(key_i,key_j,Nint,hij) call get_mono_excitation(key_i,key_j,exc,phase,Nint) !DIR$ FORCEINLINE call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint) - has_mipi = .False. if (exc(0,1,1) == 1) then ! Mono alpha m = exc(1,1,1) p = exc(1,2,1) do k = 1, elec_alpha_num - i = occ(k,1) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map) - has_mipi(i) = .True. - endif + hij = hij + mo_bielec_integral_mipi_anti(occ(k,1),m,p) enddo do k = 1, elec_beta_num - i = occ(k,2) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo - - do k = 1, elec_alpha_num - hij = hij + mipi(occ(k,1)) - miip(occ(k,1)) - enddo - do k = 1, elec_beta_num - hij = hij + mipi(occ(k,2)) + hij = hij + mo_bielec_integral_mipi(occ(k,2),m,p) enddo else ! Mono beta m = exc(1,1,2) p = exc(1,2,2) - do k = 1, elec_beta_num - i = occ(k,2) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo + do k = 1, elec_alpha_num - i = occ(k,1) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo - - do k = 1, elec_alpha_num - hij = hij + mipi(occ(k,1)) + hij = hij + mo_bielec_integral_mipi(occ(k,1),m,p) enddo do k = 1, elec_beta_num - hij = hij + mipi(occ(k,2)) - miip(occ(k,2)) + hij = hij + mo_bielec_integral_mipi_anti(occ(k,2),m,p) enddo endif @@ -651,8 +617,6 @@ subroutine i_H_j_phase_out(key_i,key_j,Nint,hij,phase,exc,degree) integer :: occ(Nint*bit_kind_size,2) double precision :: diag_H_mat_elem integer :: n_occ_ab(2) - logical :: has_mipi(Nint*bit_kind_size) - double precision :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size) PROVIDE mo_bielec_integrals_in_map mo_integrals_map ASSERT (Nint > 0) @@ -704,59 +668,27 @@ subroutine i_H_j_phase_out(key_i,key_j,Nint,hij,phase,exc,degree) call get_mono_excitation(key_i,key_j,exc,phase,Nint) !DIR$ FORCEINLINE call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint) - has_mipi = .False. if (exc(0,1,1) == 1) then ! Mono alpha m = exc(1,1,1) p = exc(1,2,1) do k = 1, elec_alpha_num - i = occ(k,1) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map) - has_mipi(i) = .True. - endif + hij = hij + mo_bielec_integral_mipi_anti(occ(k,1),m,p) enddo do k = 1, elec_beta_num - i = occ(k,2) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - has_mipi(i) = .True. - endif + hij = hij + mo_bielec_integral_mipi(occ(k,2),m,p) enddo - - do k = 1, elec_alpha_num - hij = hij + mipi(occ(k,1)) - miip(occ(k,1)) - enddo - do k = 1, elec_beta_num - hij = hij + mipi(occ(k,2)) - enddo - + else ! Mono beta m = exc(1,1,2) p = exc(1,2,2) - do k = 1, elec_beta_num - i = occ(k,2) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo - do k = 1, elec_alpha_num - i = occ(k,1) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo do k = 1, elec_alpha_num - hij = hij + mipi(occ(k,1)) + hij = hij + mo_bielec_integral_mipi(occ(k,1),m,p) enddo do k = 1, elec_beta_num - hij = hij + mipi(occ(k,2)) - miip(occ(k,2)) + hij = hij + mo_bielec_integral_mipi_anti(occ(k,2),m,p) enddo endif @@ -787,8 +719,6 @@ subroutine i_H_j_verbose(key_i,key_j,Nint,hij,hmono,hdouble) integer :: occ(Nint*bit_kind_size,2) double precision :: diag_H_mat_elem, phase,phase_2 integer :: n_occ_ab(2) - logical :: has_mipi(Nint*bit_kind_size) - double precision :: mipi(Nint*bit_kind_size), miip(Nint*bit_kind_size) PROVIDE mo_bielec_integrals_in_map mo_integrals_map ASSERT (Nint > 0) @@ -842,59 +772,26 @@ subroutine i_H_j_verbose(key_i,key_j,Nint,hij,hmono,hdouble) call get_mono_excitation(key_i,key_j,exc,phase,Nint) !DIR$ FORCEINLINE call bitstring_to_list_ab(key_i, occ, n_occ_ab, Nint) - has_mipi = .False. if (exc(0,1,1) == 1) then ! Mono alpha m = exc(1,1,1) p = exc(1,2,1) do k = 1, elec_alpha_num - i = occ(k,1) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map) - has_mipi(i) = .True. - endif + hdouble = hdouble + mo_bielec_integral_mipi_anti(occ(k,1),m,p) enddo do k = 1, elec_beta_num - i = occ(k,2) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo - - do k = 1, elec_alpha_num - hdouble = hdouble + mipi(occ(k,1)) - miip(occ(k,1)) - enddo - do k = 1, elec_beta_num - hdouble = hdouble + mipi(occ(k,2)) + hdouble = hdouble + mo_bielec_integral_mipi(occ(k,2),m,p) enddo else ! Mono beta m = exc(1,1,2) p = exc(1,2,2) - do k = 1, elec_beta_num - i = occ(k,2) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - miip(i) = get_mo_bielec_integral(m,i,i,p,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo do k = 1, elec_alpha_num - i = occ(k,1) - if (.not.has_mipi(i)) then - mipi(i) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) - has_mipi(i) = .True. - endif - enddo - - do k = 1, elec_alpha_num - hdouble = hdouble + mipi(occ(k,1)) + hdouble = hdouble + mo_bielec_integral_mipi(occ(k,1),m,p) enddo do k = 1, elec_beta_num - hdouble = hdouble + mipi(occ(k,2)) - miip(occ(k,2)) + hdouble = hdouble + mo_bielec_integral_mipi_anti(occ(k,2),m,p) enddo endif diff --git a/src/Integrals_Bielec/map_integrals.irp.f b/src/Integrals_Bielec/map_integrals.irp.f index 22fd48a6..b41a3177 100644 --- a/src/Integrals_Bielec/map_integrals.irp.f +++ b/src/Integrals_Bielec/map_integrals.irp.f @@ -370,6 +370,7 @@ BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0:64*64*64*64) ] END_PROVIDER + double precision function get_mo_bielec_integral(i,j,k,l,map) use map_module implicit none diff --git a/src/Integrals_Bielec/mo_bi_integrals.irp.f b/src/Integrals_Bielec/mo_bi_integrals.irp.f index 0a468c24..e581b536 100644 --- a/src/Integrals_Bielec/mo_bi_integrals.irp.f +++ b/src/Integrals_Bielec/mo_bi_integrals.irp.f @@ -467,6 +467,31 @@ END_PROVIDER enddo enddo +END_PROVIDER + + BEGIN_PROVIDER [ double precision, mo_bielec_integral_mipi, (mo_tot_num_align,mo_tot_num,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, mo_bielec_integral_mipi_anti, (mo_tot_num_align,mo_tot_num,mo_tot_num) ] + implicit none + BEGIN_DOC + ! and - . Indices are (i,m,p) + END_DOC + + integer :: m,i,p + double precision :: get_mo_bielec_integral + + PROVIDE mo_bielec_integrals_in_map + + mo_bielec_integral_mipi = 0.d0 + mo_bielec_integral_mipi_anti = 0.d0 + do p=1,mo_tot_num + do m=1,mo_tot_num + do i=1,mo_tot_num + mo_bielec_integral_mipi(i,m,p) = get_mo_bielec_integral(m,i,p,i,mo_integrals_map) + mo_bielec_integral_mipi_anti(i,m,p) = mo_bielec_integral_mipi(i,m,p) - get_mo_bielec_integral(m,i,i,p,mo_integrals_map) + enddo + enddo + enddo + END_PROVIDER BEGIN_PROVIDER [ double precision, mo_bielec_integral_schwartz,(mo_tot_num,mo_tot_num) ]