From 0f9b2dbfe48f821ec9f67bf0008488d93a0ca2de Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 17 Jan 2023 01:54:58 +0100 Subject: [PATCH 1/9] fast restore symmetry --- src/utils/linear_algebra.irp.f | 126 +++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 1e33c7dc..9517766e 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1690,6 +1690,19 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) thresh2 = dsqrt(thresh) call nullify_small_elements(m,n,A,LDA,thresh) + ! Debug + !double precision, allocatable :: B(:,:) + !double precision :: max_diff, ti,tf + !allocate(B(m,n)) + !B = A + !call wall_time(ti) + !call restore_symmetry_fast(m,n,B,LDA,thresh) + !call wall_time(tf) + !print*,'' + !print*,'Restore_symmetry' + !print*,'Fast version:',tf-ti,'s' + !call wall_time(ti) + ! if (.not.restore_symm) then ! return ! endif @@ -1749,23 +1762,128 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) enddo enddo + ! Debug + !call wall_time(tf) + !print*,'Old version:',tf-ti,'s' + + !max_diff = 0d0 + !do j = 1, n + ! do i = 1, n + ! if (dabs(A(i,j)-B(i,j)) > max_diff) then + ! max_diff = dabs(A(i,j)-B(i,j)) + ! endif + ! enddo + !enddo + !print*,'Max diff:', max_diff + !deallocate(B) + end +subroutine restore_symmetry_fast(m,n,A,LDA,thresh) + implicit none + BEGIN_DOC + ! Tries to find the matrix elements that are the same, and sets them + ! to the average value. + ! If restore_symm is False, only nullify small elements + END_DOC + integer, intent(in) :: m,n,LDA + double precision, intent(inout) :: A(LDA,n) + double precision, intent(in) :: thresh + double precision, allocatable :: copy(:), copy_sign(:) + integer, allocatable :: key(:) + integer :: sze, pi, pf, idx, i,j,k + double precision :: average, val, thresh2 + thresh2 = dsqrt(thresh) + call nullify_small_elements(m,n,A,LDA,thresh) + + sze = m * n + allocate(copy(sze),copy_sign(sze),key(sze)) + + ! Copy to 1D + !$OMP PARALLEL & + !$OMP SHARED(A,m,n,sze,copy_sign,copy,key) & + !$OMP PRIVATE(i,j,k) & + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, n + do i = 1, m + copy(i+(j-1)*m) = A(i,j) + enddo + enddo + !$OMP END DO + ! Copy sign + !$OMP DO + do i = 1,sze + copy_sign(i) = sign(1d0,copy(i)) + copy(i) = dabs(copy(i)) + enddo + !$OMP END DO NOWAIT + ! Keys + !$OMP DO + do i = 1, sze + key(i) = i + enddo + !$OMP END DO + !$OMP END PARALLEL + ! Sort + call dsort(copy,key,sze) + ! Symmetrize + i = 1 + do while(i < sze) + pi = i + pf = i + val = copy(i) + do while (dabs(val - copy(pf+1)) < thresh2) + pf = pf + 1 + ! if pf == sze, copy(pf+1) will not be valid + if (pf == sze) then + exit + endif + enddo + ! if pi and pf are different do the average from pi to pf + if (pf - pi > 0) then + average = 0d0 + do j = pi, pf + average = average + copy(j) + enddo + average = average / (pf-pi+1) + do j = pi, pf + copy(j) = average + enddo + ! Update i + i = pf + endif + ! Update i + i = i + 1 + enddo + !$OMP PARALLEL & + !$OMP SHARED(m,sze,copy_sign,copy,key,A) & + !$OMP PRIVATE(i,j,k,idx) & + !$OMP DEFAULT(NONE) + ! copy -> A + !$OMP DO + do k = 1, sze + idx = key(k) + i = mod(idx-1,m) + 1 + j = (idx-1) / m + 1 + ! New value with the right sign + A(i,j) = sign(copy(k),copy_sign(idx)) + enddo + !$OMP END DO + !$OMP END PARALLEL + deallocate(copy,copy_sign,key) - - - - +end !subroutine svd_s(A, LDA, U, LDU, D, Vt, LDVt, m, n) ! implicit none From 17add36bdac7e0093ba993ced16e62d30cb4d7b7 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 17 Jan 2023 09:49:14 +0100 Subject: [PATCH 2/9] remove error, replace symmetry condition --- src/utils/linear_algebra.irp.f | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 9517766e..20599325 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1835,13 +1835,21 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh) ! Sort call dsort(copy,key,sze) + !TODO + ! Parallelization with OMP + + ! Jump all the elements below thresh + i = 1 + do while (copy(i) <= thresh) + i = i + 1 + enddo + ! Symmetrize - i = 1 do while(i < sze) pi = i pf = i - val = copy(i) - do while (dabs(val - copy(pf+1)) < thresh2) + val = 1d0/copy(i) + do while (dabs(val * copy(pf+1) - 1d0) < thresh2) pf = pf + 1 ! if pf == sze, copy(pf+1) will not be valid if (pf == sze) then From 3d15420fb93c6becd64f1b6b34036ba319ea77e7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 17 Jan 2023 11:12:54 +0200 Subject: [PATCH 3/9] Configuration for LUMI --- config/cray_gfortran.cfg | 68 +++++++++++++++++++++++++++++ config/{cray.cfg => cray_intel.cfg} | 0 configure | 4 +- 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 config/cray_gfortran.cfg rename config/{cray.cfg => cray_intel.cfg} (100%) diff --git a/config/cray_gfortran.cfg b/config/cray_gfortran.cfg new file mode 100644 index 00000000..1d1013b7 --- /dev/null +++ b/config/cray_gfortran.cfg @@ -0,0 +1,68 @@ +# On LUMI +# +# export SPACK_USER_PREFIX=$HOME/spack +# module swap PrgEnv-cray/8.3.3 PrgEnv-gnu/8.3.3 +# module load spack/22.08 +# module load openblas/0.3.17-gcc-omp-xi +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -ffree-line-length-none -I . -mavx -g -fPIC +LAPACK_LIB : -L/appl/lumi/spack/22.08/0.18.1/opt/spack/openblas-0.3.17-xinceno/lib -lopenblas +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast -march=native + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -fcheck=all -g + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/cray.cfg b/config/cray_intel.cfg similarity index 100% rename from config/cray.cfg rename to config/cray_intel.cfg diff --git a/configure b/configure index 79cd7119..852082e7 100755 --- a/configure +++ b/configure @@ -99,7 +99,7 @@ PACKAGES=$(echo $PACKAGES | xargs) echo "export QP_ROOT=\"$QP_ROOT\"" > ${QP_ROOT}/etc/00.qp_root.rc -source quantum_package.rc +source ${QP_ROOT}/quantum_package.rc @@ -293,7 +293,7 @@ EOF done -source quantum_package.rc +source ${QP_ROOT}/quantum_package.rc NINJA=$(find_exe ninja) if [[ ${NINJA} = $(not_found) ]] ; then From 2f937cbca4b8df1d663cf61e01329b08eddd4128 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 17 Jan 2023 13:07:50 +0100 Subject: [PATCH 4/9] Use Yann's restore_symmetry --- src/determinants/slater_rules.irp.f | 21 +++- src/utils/linear_algebra.irp.f | 175 +++++----------------------- 2 files changed, 52 insertions(+), 144 deletions(-) diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index b9710fd1..9bab9213 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -3,8 +3,27 @@ subroutine get_excitation_degree(key1,key2,degree,Nint) include 'utils/constants.include.F' implicit none BEGIN_DOC - ! Returns the excitation degree between two determinants. + ! This function calculates the excitation degree between two + ! determinants, which is half the number of bits that are different between the two + ! determinants. The function takes four arguments: + ! + ! * key1: An integer array of length Nint*2, representing the first determinant. + ! + ! * key2: An integer array of length Nint*2, representing the second determinant. + ! + ! * degree: An integer, passed by reference, that will store the calculated excitation degree. + ! + ! * Nint: An integer representing the number of integers in each of the key1 and key2 arrays. + ! + ! It starts a select case block that depends on the value of Nint. + ! In each case, the function first calculates the bitwise XOR of each + ! corresponding pair of elements in key1 and key2, storing the results in the + ! xorvec array. It then calculates the number of bits set (using the popcnt + ! function) for each element in xorvec, and sums these counts up. This sum is + ! stored in the degree variable. + ! Finally, the degree variable is right-shifted by 1 bit to divide the result by 2. END_DOC + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key1(Nint*2) integer(bit_kind), intent(in) :: key2(Nint*2) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 20599325..c8a03e65 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1675,111 +1675,6 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh) end subroutine restore_symmetry(m,n,A,LDA,thresh) - implicit none - BEGIN_DOC -! Tries to find the matrix elements that are the same, and sets them -! to the average value. -! If restore_symm is False, only nullify small elements - END_DOC - integer, intent(in) :: m,n,LDA - double precision, intent(inout) :: A(LDA,n) - double precision, intent(in) :: thresh - integer :: i,j,k,l - logical, allocatable :: done(:,:) - double precision :: f, g, count, thresh2 - thresh2 = dsqrt(thresh) - call nullify_small_elements(m,n,A,LDA,thresh) - - ! Debug - !double precision, allocatable :: B(:,:) - !double precision :: max_diff, ti,tf - !allocate(B(m,n)) - !B = A - !call wall_time(ti) - !call restore_symmetry_fast(m,n,B,LDA,thresh) - !call wall_time(tf) - !print*,'' - !print*,'Restore_symmetry' - !print*,'Fast version:',tf-ti,'s' - !call wall_time(ti) - -! if (.not.restore_symm) then -! return -! endif - - ! TODO: Costs O(n^4), but can be improved to (2 n^2 * log(n)): - ! - copy all values in a 1D array - ! - sort 1D array - ! - average nearby elements - ! - for all elements, find matching value in the sorted 1D array - - allocate(done(m,n)) - - do j=1,n - do i=1,m - done(i,j) = A(i,j) == 0.d0 - enddo - enddo - - do j=1,n - do i=1,m - if ( done(i,j) ) cycle - done(i,j) = .True. - count = 1.d0 - f = 1.d0/A(i,j) - do l=1,n - do k=1,m - if ( done(k,l) ) cycle - g = f * A(k,l) - if ( dabs(dabs(g) - 1.d0) < thresh2 ) then - count = count + 1.d0 - if (g>0.d0) then - A(i,j) = A(i,j) + A(k,l) - else - A(i,j) = A(i,j) - A(k,l) - end if - endif - enddo - enddo - if (count > 1.d0) then - A(i,j) = A(i,j) / count - do l=1,n - do k=1,m - if ( done(k,l) ) cycle - g = f * A(k,l) - if ( dabs(dabs(g) - 1.d0) < thresh2 ) then - done(k,l) = .True. - if (g>0.d0) then - A(k,l) = A(i,j) - else - A(k,l) = -A(i,j) - end if - endif - enddo - enddo - endif - - enddo - enddo - - ! Debug - !call wall_time(tf) - !print*,'Old version:',tf-ti,'s' - - !max_diff = 0d0 - !do j = 1, n - ! do i = 1, n - ! if (dabs(A(i,j)-B(i,j)) > max_diff) then - ! max_diff = dabs(A(i,j)-B(i,j)) - ! endif - ! enddo - !enddo - !print*,'Max diff:', max_diff - !deallocate(B) - -end - -subroutine restore_symmetry_fast(m,n,A,LDA,thresh) implicit none @@ -1794,58 +1689,53 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh) double precision, intent(in) :: thresh double precision, allocatable :: copy(:), copy_sign(:) - integer, allocatable :: key(:) + integer, allocatable :: key(:), ii(:), jj(:) integer :: sze, pi, pf, idx, i,j,k double precision :: average, val, thresh2 thresh2 = dsqrt(thresh) - call nullify_small_elements(m,n,A,LDA,thresh) - + sze = m * n - allocate(copy(sze),copy_sign(sze),key(sze)) - + allocate(copy(sze),copy_sign(sze),key(sze),ii(sze),jj(sze)) + ! Copy to 1D - !$OMP PARALLEL & - !$OMP SHARED(A,m,n,sze,copy_sign,copy,key) & + !$OMP PARALLEL if (m>100) & + !$OMP SHARED(A,m,n,sze,copy_sign,copy,key,ii,jj) & !$OMP PRIVATE(i,j,k) & !$OMP DEFAULT(NONE) - !$OMP DO + !$OMP DO COLLAPSE(2) do j = 1, n do i = 1, m - copy(i+(j-1)*m) = A(i,j) + k = i+(j-1)*m + copy(k) = A(i,j) + copy_sign(k) = sign(1.d0,copy(k)) + copy(k) = -dabs(copy(k)) + key(k) = k + ii(k) = i + jj(k) = j enddo enddo !$OMP END DO - ! Copy sign - !$OMP DO - do i = 1,sze - copy_sign(i) = sign(1d0,copy(i)) - copy(i) = dabs(copy(i)) - enddo - !$OMP END DO NOWAIT - ! Keys - !$OMP DO - do i = 1, sze - key(i) = i - enddo - !$OMP END DO !$OMP END PARALLEL ! Sort call dsort(copy,key,sze) + call iset_order(ii,key,sze) + call iset_order(jj,key,sze) + call dset_order(copy_sign,key,sze) - !TODO + !TODO ! Parallelization with OMP - ! Jump all the elements below thresh - i = 1 - do while (copy(i) <= thresh) - i = i + 1 - enddo +! ! Skip all the elements below thresh +! i = 1 +! do while (copy(i) <= thresh) +! i = i + 1 +! enddo ! Symmetrize - do while(i < sze) + do while( (i < sze).and.(-copy(i) > thresh) ) pi = i pf = i val = 1d0/copy(i) @@ -1862,7 +1752,7 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh) do j = pi, pf average = average + copy(j) enddo - average = average / (pf-pi+1) + average = average / (pf-pi+1.d0) do j = pi, pf copy(j) = average enddo @@ -1872,24 +1762,23 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh) ! Update i i = i + 1 enddo + copy(i:) = 0.d0 - !$OMP PARALLEL & - !$OMP SHARED(m,sze,copy_sign,copy,key,A) & + !$OMP PARALLEL if (sze>10000) & + !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) & !$OMP PRIVATE(i,j,k,idx) & !$OMP DEFAULT(NONE) ! copy -> A !$OMP DO do k = 1, sze - idx = key(k) - i = mod(idx-1,m) + 1 - j = (idx-1) / m + 1 - ! New value with the right sign - A(i,j) = sign(copy(k),copy_sign(idx)) + i = ii(k) + j = jj(k) + A(i,j) = sign(copy(k),copy_sign(k)) enddo !$OMP END DO !$OMP END PARALLEL - deallocate(copy,copy_sign,key) + deallocate(copy,copy_sign,key,ii,jj) end From b101fd398df55a5395059563028e1455a9c1f12a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 17 Jan 2023 13:38:50 +0100 Subject: [PATCH 5/9] Removed collapse --- src/utils/linear_algebra.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index c8a03e65..c1304698 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1704,7 +1704,7 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) !$OMP SHARED(A,m,n,sze,copy_sign,copy,key,ii,jj) & !$OMP PRIVATE(i,j,k) & !$OMP DEFAULT(NONE) - !$OMP DO COLLAPSE(2) + !$OMP DO do j = 1, n do i = 1, m k = i+(j-1)*m From e953cf44156151192decc6b461f358e7d98b6cc8 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 17 Jan 2023 13:40:59 +0100 Subject: [PATCH 6/9] avoid uninitialized phase in get_excitation --- src/determinants/slater_rules.irp.f | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index 9bab9213..10881ea5 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -126,6 +126,8 @@ subroutine get_excitation(det1,det2,exc,degree,phase,Nint) return case(0) + ! Avoid uninitialized phase + phase = 1d0 return end select From 66ce35f5dbfbea8f07daf3a631faf8d2bcdc269c Mon Sep 17 00:00:00 2001 From: ydamour Date: Wed, 18 Jan 2023 09:05:16 +0100 Subject: [PATCH 7/9] bugfix in restore_symmetry --- src/utils/linear_algebra.irp.f | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index c1304698..44344a19 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1735,6 +1735,7 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) ! enddo ! Symmetrize + i = 1 do while( (i < sze).and.(-copy(i) > thresh) ) pi = i pf = i From e1e9ae0941bd366c2f3f717d67a6e11cbe45502e Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 23 Jan 2023 19:04:33 +0100 Subject: [PATCH 8/9] fix warning --- src/ao_one_e_ints/point_charges.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f index c038458d..82388c0d 100644 --- a/src/ao_one_e_ints/point_charges.irp.f +++ b/src/ao_one_e_ints/point_charges.irp.f @@ -156,7 +156,7 @@ BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ] endif print*,'Coordinates for the point charges ' do i = 1, n_pts_charge - write(*,'(I3,X,3(F16.8,X))'),i,pts_charge_coord(i,1:3) + write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3) enddo END_PROVIDER From da344f6bcd0fff57b1f6d2200f9833a54ef80247 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 26 Jan 2023 12:46:01 +0100 Subject: [PATCH 9/9] Fix documentation --- docs/source/users_guide/qp_convert_output_to_ezfio.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/users_guide/qp_convert_output_to_ezfio.rst b/docs/source/users_guide/qp_convert_output_to_ezfio.rst index 171e2796..035ced4a 100644 --- a/docs/source/users_guide/qp_convert_output_to_ezfio.rst +++ b/docs/source/users_guide/qp_convert_output_to_ezfio.rst @@ -25,7 +25,7 @@ Usage .. note:: - All the parameters of the wave functgion need to be presente in the + All the parameters of the wave function need to be present in the output file : complete description of the |AO| basis set, full set of molecular orbitals, etc.