diff --git a/config/cray_gfortran.cfg b/config/cray_gfortran.cfg new file mode 100644 index 00000000..1d1013b7 --- /dev/null +++ b/config/cray_gfortran.cfg @@ -0,0 +1,68 @@ +# On LUMI +# +# export SPACK_USER_PREFIX=$HOME/spack +# module swap PrgEnv-cray/8.3.3 PrgEnv-gnu/8.3.3 +# module load spack/22.08 +# module load openblas/0.3.17-gcc-omp-xi +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -ffree-line-length-none -I . -mavx -g -fPIC +LAPACK_LIB : -L/appl/lumi/spack/22.08/0.18.1/opt/spack/openblas-0.3.17-xinceno/lib -lopenblas +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast -march=native + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -fcheck=all -g + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/cray.cfg b/config/cray_intel.cfg similarity index 100% rename from config/cray.cfg rename to config/cray_intel.cfg diff --git a/configure b/configure index 79cd7119..852082e7 100755 --- a/configure +++ b/configure @@ -99,7 +99,7 @@ PACKAGES=$(echo $PACKAGES | xargs) echo "export QP_ROOT=\"$QP_ROOT\"" > ${QP_ROOT}/etc/00.qp_root.rc -source quantum_package.rc +source ${QP_ROOT}/quantum_package.rc @@ -293,7 +293,7 @@ EOF done -source quantum_package.rc +source ${QP_ROOT}/quantum_package.rc NINJA=$(find_exe ninja) if [[ ${NINJA} = $(not_found) ]] ; then diff --git a/docs/source/users_guide/qp_convert_output_to_ezfio.rst b/docs/source/users_guide/qp_convert_output_to_ezfio.rst index 171e2796..035ced4a 100644 --- a/docs/source/users_guide/qp_convert_output_to_ezfio.rst +++ b/docs/source/users_guide/qp_convert_output_to_ezfio.rst @@ -25,7 +25,7 @@ Usage .. note:: - All the parameters of the wave functgion need to be presente in the + All the parameters of the wave function need to be present in the output file : complete description of the |AO| basis set, full set of molecular orbitals, etc. diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 90ee61f5..242151e0 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 90ee61f5041c7c94a0c605625a264860292813a0 +Subproject commit 242151e03d1d6bf042387226431d82d35845686a diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f new file mode 100644 index 00000000..82388c0d --- /dev/null +++ b/src/ao_one_e_ints/point_charges.irp.f @@ -0,0 +1,272 @@ + +! --- + + +BEGIN_PROVIDER [ integer, n_pts_charge ] + implicit none + BEGIN_DOC +! Number of point charges to be added to the potential + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + + call ezfio_has_ao_one_e_ints_n_pts_charge(has) + if (has) then + write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..' + call ezfio_get_ao_one_e_ints_n_pts_charge(n_pts_charge) + else + print *, 'ao_one_e_ints/n_pts_charge not found in EZFIO file' + stop 1 + endif + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read n_pts_charge with MPI' + endif + IRP_ENDIF + + call write_time(6) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ] + + BEGIN_DOC + ! Charge associated to each point charge. + END_DOC + + implicit none + logical :: exists + + PROVIDE ezfio_filename + + if (mpi_master) then + call ezfio_has_ao_one_e_ints_pts_charge_z(exists) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read pts_charge_z with MPI' + endif + IRP_ENDIF + + if (exists) then + + if (mpi_master) then + write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..' + call ezfio_get_ao_one_e_ints_pts_charge_z(pts_charge_z) + IRP_IF MPI + call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read pts_charge_z with MPI' + endif + IRP_ENDIF + endif + + else + + integer :: i + do i = 1, n_pts_charge + pts_charge_z(i) = 0.d0 + enddo + + endif + print*,'Point charges ' + do i = 1, n_pts_charge + print*,'i,pts_charge_z(i)',i,pts_charge_z(i) + enddo + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ] + + BEGIN_DOC + ! Coordinates of each point charge. + END_DOC + + implicit none + logical :: exists + + PROVIDE ezfio_filename + + if (mpi_master) then + call ezfio_has_ao_one_e_ints_pts_charge_coord(exists) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read pts_charge_coord with MPI' + endif + IRP_ENDIF + + if (exists) then + + if (mpi_master) then + double precision, allocatable :: buffer(:,:) + allocate (buffer(n_pts_charge,3)) + write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..' + call ezfio_get_ao_one_e_ints_pts_charge_coord(buffer) + integer :: i,j + do i=1,3 + do j=1,n_pts_charge + pts_charge_coord(j,i) = buffer(j,i) + enddo + enddo + deallocate(buffer) + IRP_IF MPI + call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read pts_charge_coord with MPI' + endif + IRP_ENDIF + endif + + else + + do i = 1, n_pts_charge + pts_charge_coord(i,:) = 0.d0 + enddo + + endif + print*,'Coordinates for the point charges ' + do i = 1, n_pts_charge + write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3) + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)] + + BEGIN_DOC + ! Point charge-electron interaction, in the |AO| basis set. + ! + ! :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle` + ! + ! These integrals also contain the pseudopotential integrals. + END_DOC + + implicit none + integer :: num_A, num_B, power_A(3), power_B(3) + integer :: i, j, k, l, n_pt_in, m + double precision :: alpha, beta + double precision :: A_center(3),B_center(3),C_center(3) + double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult + + ao_integrals_pt_chrg = 0.d0 + +! if (read_ao_integrals_pt_chrg) then +! +! call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg) +! print *, 'AO N-e integrals read from disk' +! +! else + +! if(use_cosgtos) then +! !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos +! +! do j = 1, ao_num +! do i = 1, ao_num +! ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j) +! enddo +! enddo +! +! else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,c1,n_pt_in) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,& + !$OMP n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z) + + n_pt_in = n_pt_max_integrals + + !$OMP DO SCHEDULE (dynamic) + + do j = 1, ao_num + num_A = ao_nucl(j) + power_A(1:3)= ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + + do i = 1, ao_num + + num_B = ao_nucl(i) + power_B(1:3)= ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + do l=1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(l,j) + + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + + double precision :: c, c1 + c = 0.d0 + + do k = 1, n_pts_charge + double precision :: Z + Z = pts_charge_z(k) + + C_center(1:3) = pts_charge_coord(k,1:3) + + c1 = NAI_pol_mult( A_center, B_center, power_A, power_B & + , alpha, beta, C_center, n_pt_in ) + + c = c + Z * c1 + + enddo + ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j) & + + ao_coef_normalized_ordered_transp(l,j) & + * ao_coef_normalized_ordered_transp(m,i) * c + enddo + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + +! endif + + +! IF(do_pseudo) THEN +! ao_integrals_pt_chrg += ao_pseudo_integrals +! ENDIF + +! endif + + +! if (write_ao_integrals_pt_chrg) then +! call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg) +! print *, 'AO N-e integrals written to disk' +! endif + +END_PROVIDER diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index 78607b7c..d4f7011c 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -3,8 +3,27 @@ subroutine get_excitation_degree(key1,key2,degree,Nint) include 'utils/constants.include.F' implicit none BEGIN_DOC - ! Returns the excitation degree between two determinants. + ! This function calculates the excitation degree between two + ! determinants, which is half the number of bits that are different between the two + ! determinants. The function takes four arguments: + ! + ! * key1: An integer array of length Nint*2, representing the first determinant. + ! + ! * key2: An integer array of length Nint*2, representing the second determinant. + ! + ! * degree: An integer, passed by reference, that will store the calculated excitation degree. + ! + ! * Nint: An integer representing the number of integers in each of the key1 and key2 arrays. + ! + ! It starts a select case block that depends on the value of Nint. + ! In each case, the function first calculates the bitwise XOR of each + ! corresponding pair of elements in key1 and key2, storing the results in the + ! xorvec array. It then calculates the number of bits set (using the popcnt + ! function) for each element in xorvec, and sums these counts up. This sum is + ! stored in the degree variable. + ! Finally, the degree variable is right-shifted by 1 bit to divide the result by 2. END_DOC + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key1(Nint*2) integer(bit_kind), intent(in) :: key2(Nint*2) @@ -107,6 +126,8 @@ subroutine get_excitation(det1,det2,exc,degree,phase,Nint) return case(0) + ! Avoid uninitialized phase + phase = 1d0 return end select diff --git a/src/nuclei/point_charges.irp.f b/src/nuclei/point_charges.irp.f index 86038742..752fcf99 100644 --- a/src/nuclei/point_charges.irp.f +++ b/src/nuclei/point_charges.irp.f @@ -155,7 +155,7 @@ BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ] endif print*,'Coordinates for the point charges ' do i = 1, n_pts_charge - write(*,'(I3,X,3(F16.8,X))'),i,pts_charge_coord(i,1:3) + write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3) enddo END_PROVIDER diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 1e33c7dc..44344a19 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1675,98 +1675,114 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh) end subroutine restore_symmetry(m,n,A,LDA,thresh) + implicit none + BEGIN_DOC -! Tries to find the matrix elements that are the same, and sets them -! to the average value. -! If restore_symm is False, only nullify small elements + ! Tries to find the matrix elements that are the same, and sets them + ! to the average value. + ! If restore_symm is False, only nullify small elements END_DOC + integer, intent(in) :: m,n,LDA double precision, intent(inout) :: A(LDA,n) double precision, intent(in) :: thresh - integer :: i,j,k,l - logical, allocatable :: done(:,:) - double precision :: f, g, count, thresh2 + + double precision, allocatable :: copy(:), copy_sign(:) + integer, allocatable :: key(:), ii(:), jj(:) + integer :: sze, pi, pf, idx, i,j,k + double precision :: average, val, thresh2 + thresh2 = dsqrt(thresh) - call nullify_small_elements(m,n,A,LDA,thresh) -! if (.not.restore_symm) then -! return -! endif + sze = m * n - ! TODO: Costs O(n^4), but can be improved to (2 n^2 * log(n)): - ! - copy all values in a 1D array - ! - sort 1D array - ! - average nearby elements - ! - for all elements, find matching value in the sorted 1D array + allocate(copy(sze),copy_sign(sze),key(sze),ii(sze),jj(sze)) - allocate(done(m,n)) - - do j=1,n - do i=1,m - done(i,j) = A(i,j) == 0.d0 + ! Copy to 1D + !$OMP PARALLEL if (m>100) & + !$OMP SHARED(A,m,n,sze,copy_sign,copy,key,ii,jj) & + !$OMP PRIVATE(i,j,k) & + !$OMP DEFAULT(NONE) + !$OMP DO + do j = 1, n + do i = 1, m + k = i+(j-1)*m + copy(k) = A(i,j) + copy_sign(k) = sign(1.d0,copy(k)) + copy(k) = -dabs(copy(k)) + key(k) = k + ii(k) = i + jj(k) = j enddo enddo + !$OMP END DO + !$OMP END PARALLEL - do j=1,n - do i=1,m - if ( done(i,j) ) cycle - done(i,j) = .True. - count = 1.d0 - f = 1.d0/A(i,j) - do l=1,n - do k=1,m - if ( done(k,l) ) cycle - g = f * A(k,l) - if ( dabs(dabs(g) - 1.d0) < thresh2 ) then - count = count + 1.d0 - if (g>0.d0) then - A(i,j) = A(i,j) + A(k,l) - else - A(i,j) = A(i,j) - A(k,l) - end if - endif - enddo - enddo - if (count > 1.d0) then - A(i,j) = A(i,j) / count - do l=1,n - do k=1,m - if ( done(k,l) ) cycle - g = f * A(k,l) - if ( dabs(dabs(g) - 1.d0) < thresh2 ) then - done(k,l) = .True. - if (g>0.d0) then - A(k,l) = A(i,j) - else - A(k,l) = -A(i,j) - end if - endif - enddo - enddo + ! Sort + call dsort(copy,key,sze) + call iset_order(ii,key,sze) + call iset_order(jj,key,sze) + call dset_order(copy_sign,key,sze) + + !TODO + ! Parallelization with OMP + +! ! Skip all the elements below thresh +! i = 1 +! do while (copy(i) <= thresh) +! i = i + 1 +! enddo + + ! Symmetrize + i = 1 + do while( (i < sze).and.(-copy(i) > thresh) ) + pi = i + pf = i + val = 1d0/copy(i) + do while (dabs(val * copy(pf+1) - 1d0) < thresh2) + pf = pf + 1 + ! if pf == sze, copy(pf+1) will not be valid + if (pf == sze) then + exit endif - enddo + ! if pi and pf are different do the average from pi to pf + if (pf - pi > 0) then + average = 0d0 + do j = pi, pf + average = average + copy(j) + enddo + average = average / (pf-pi+1.d0) + do j = pi, pf + copy(j) = average + enddo + ! Update i + i = pf + endif + ! Update i + i = i + 1 enddo + copy(i:) = 0.d0 + + !$OMP PARALLEL if (sze>10000) & + !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) & + !$OMP PRIVATE(i,j,k,idx) & + !$OMP DEFAULT(NONE) + ! copy -> A + !$OMP DO + do k = 1, sze + i = ii(k) + j = jj(k) + A(i,j) = sign(copy(k),copy_sign(k)) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(copy,copy_sign,key,ii,jj) end - - - - - - - - - - - - - - - - !subroutine svd_s(A, LDA, U, LDU, D, Vt, LDVt, m, n) ! implicit none ! BEGIN_DOC