From 59c37a79ff484cf99aa15d23726edaca9d72e5bd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 14 May 2014 00:07:58 +0200 Subject: [PATCH 1/4] Update documentation --- scripts/update_README.py | 8 ++++++-- src/AOs/README.rst | 6 ++++-- src/Makefile.common | 8 ++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/scripts/update_README.py b/scripts/update_README.py index c89c1c15..811899d9 100755 --- a/scripts/update_README.py +++ b/scripts/update_README.py @@ -126,8 +126,12 @@ def update_documentation(data): inside = line.startswith(".SH Description") else: if line.startswith(".SH"): - return "".join(result) - result.append(" "+line.strip()+"\n") + break + result.append(" "+line.strip()) + + if result == []: + result = [" Undocumented"] + return "\n".join(result)+'\n' diff --git a/src/AOs/README.rst b/src/AOs/README.rst index 1070d856..378bf8ae 100644 --- a/src/AOs/README.rst +++ b/src/AOs/README.rst @@ -85,8 +85,10 @@ Documentation Number of primitives per atomic orbital `ao_prim_num_max `_ -None + Undocumented + `ao_prim_num_max_align `_ -None + Undocumented + diff --git a/src/Makefile.common b/src/Makefile.common index fa02f7ec..488d1bf5 100644 --- a/src/Makefile.common +++ b/src/Makefile.common @@ -85,11 +85,6 @@ $(info -----------------------------------------------) endif -# Update the README.rst file for GitHub, and git add it - -$(shell update_README.py) - - # Define the Makefile common variables and rules EZFIO_DIR=$(QPACKAGE_ROOT)/EZFIO @@ -116,7 +111,8 @@ LIB+=$(EZFIO) $(MKL) IRPF90+=$(patsubst %, -I %, $(INCLUDE_DIRS)) $(IRPF90_FLAGS) irpf90.make: $(filter-out IRPF90_temp/%, $(wildcard */*.irp.f)) $(wildcard *.irp.f) $(wildcard *.inc.f) Makefile $(EZFIO) NEEDED_MODULES $(wildcard *.py) - $(IRPF90) + - $(IRPF90) + - update_README.py Makefile.depend: Makefile $(QPACKAGE_ROOT)/scripts/create_Makefile_depend.sh From e1d2a79b3b644c8c67c744ada95dfeb6e04f18dc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 14 May 2014 15:40:40 +0200 Subject: [PATCH 2/4] Davidson multistates works in a very standard version --- src/CISD/README.rst | 2 - src/CISD/cisd.irp.f | 9 +- src/DensityMatrix/density_matrix.irp.f | 259 ++++++++++++++++++++++++ src/DensityMatrix/det_num.irp.f | 56 ++++++ src/Dets/H_apply.irp.f | 4 + src/Dets/README.rst | 72 +++++-- src/Dets/davidson.irp.f | 261 +++++++++++++++++++++++++ src/Dets/determinants.irp.f | 16 +- src/Dets/slater_rules.irp.f | 67 +++++-- src/Utils/LinearAlgebra.irp.f | 69 +++++-- src/Utils/util.irp.f | 6 - 11 files changed, 755 insertions(+), 66 deletions(-) create mode 100644 src/DensityMatrix/density_matrix.irp.f create mode 100644 src/DensityMatrix/det_num.irp.f create mode 100644 src/Dets/davidson.irp.f diff --git a/src/CISD/README.rst b/src/CISD/README.rst index ceb15b27..1a9c86d9 100644 --- a/src/CISD/README.rst +++ b/src/CISD/README.rst @@ -30,7 +30,5 @@ Documentation Calls H_apply on the HF determinant and selects all connected single and double excitations (of the same symmetry). -`cisd `_ -None diff --git a/src/CISD/cisd.irp.f b/src/CISD/cisd.irp.f index 7a495d32..0b68124a 100644 --- a/src/CISD/cisd.irp.f +++ b/src/CISD/cisd.irp.f @@ -1,19 +1,18 @@ program cisd implicit none integer :: i + double precision, allocatable :: eigvalues(:),eigvectors(:,:) call H_apply_cisd - double precision, allocatable :: eigvalues(:),eigvectors(:,:) allocate(eigvalues(n_det),eigvectors(n_det,n_det)) print *, 'N_det = ', N_det - call lapack_diag(eigvalues,eigvectors,H_matrix_all_dets,n_det,n_det) + psi_coef = psi_coef - 1.d-4 + call davidson_diag(psi_det,psi_coef,eigvalues,size(psi_coef,1),N_det,N_states,N_int) -! print *, H_matrix_all_dets print *, '---' print *, 'HF:', HF_energy print *, '---' - do i = 1,3 + do i = 1,1 print *, 'energy(i) = ',eigvalues(i) + nuclear_repulsion enddo -! print *, eigvectors(:,1) deallocate(eigvalues,eigvectors) end diff --git a/src/DensityMatrix/density_matrix.irp.f b/src/DensityMatrix/density_matrix.irp.f new file mode 100644 index 00000000..5afcb523 --- /dev/null +++ b/src/DensityMatrix/density_matrix.irp.f @@ -0,0 +1,259 @@ + use bitmasks + BEGIN_PROVIDER [ integer, iunit_two_body_dm_aa ] +&BEGIN_PROVIDER [ integer, iunit_two_body_dm_ab ] +&BEGIN_PROVIDER [ integer, iunit_two_body_dm_bb ] + implicit none + use bitmasks + BEGIN_DOC + ! Temporary files for 2-body dm calculation + END_DOC + integer :: getUnitAndOpen + + iunit_two_body_dm_aa = getUnitAndOpen(trim(ezfio_filename)//'/work/two_body_aa.tmp','w') + iunit_two_body_dm_ab = getUnitAndOpen(trim(ezfio_filename)//'/work/two_body_ab.tmp','w') + iunit_two_body_dm_bb = getUnitAndOpen(trim(ezfio_filename)//'/work/two_body_bb.tmp','w') + ! Compute two body DM in file + integer :: k,l,degree, idx,i + integer :: exc(0:2,2,2),n_occ_alpha + double precision :: phase, coef + integer :: h1,h2,p1,p2,s1,s2 + double precision :: ck, cl + character*(128), parameter :: f = '(i8,4(x,i5),x,d16.8)' + do k=1,det_num + ck = (det_coef_provider(k)+det_coef_provider(k)) + do l=1,k-1 + cl = det_coef_provider(l) + call get_excitation_degree(det_provider(1,1,k),det_provider(1,1,l),degree,N_int) + if (degree == 2) then + call get_double_excitation(det_provider(1,1,k),det_provider(1,1,l),exc,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + call bielec_integrals_index(h1,h2,p1,p2,idx) + ckl = phase*ck*cl + select case (s1+s2) + case(2) ! alpha alpha + write(iunit_two_body_dm_aa,f) idx, h1,h2,p1,p2, ckl + call bielec_integrals_index(h1,h2,p2,p1,idx) + write(iunit_two_body_dm_aa,f) idx, h1,h2,p2,p1, -ckl + case(3) ! alpha beta + write(iunit_two_body_dm_ab,f) idx, h1,h2,p1,p2, ckl + case(4) ! beta beta + write(iunit_two_body_dm_bb,f) idx, h1,h2,p1,p2, ckl + call bielec_integrals_index(h1,h2,p2,p1,idx) + write(iunit_two_body_dm_bb,f) idx, h1,h2,p2,p1, -ckl + end select + else if (degree == 1) then + call get_mono_excitation(det_provider(1,1,k),det_provider(1,1,l),exc,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + double precision :: ckl + ckl = phase*ck*cl + call bitstring_to_list(det_provider(1,1,k), occ(1,1), n_occ_alpha, N_int) + call bitstring_to_list(det_provider(1,2,k), occ(1,2), n_occ_alpha, N_int) + select case (s1) + case (1) ! Alpha single excitation + integer :: occ(N_int*bit_kind_size,2) + do i = 1, elec_alpha_num + p2=occ(i,1) + h2=p2 + call bielec_integrals_index(h1,h2,p1,p2,idx) + write(iunit_two_body_dm_aa,f) idx, h1,h2,p1,p2, ckl + call bielec_integrals_index(h1,h2,p2,p1,idx) + write(iunit_two_body_dm_aa,f) idx, h1,h2,p2,p1, -ckl + enddo + do i = 1, elec_beta_num + p2=occ(i,2) + h2=p2 + call bielec_integrals_index(h1,h2,p1,p2,idx) + write(iunit_two_body_dm_ab,f) idx, h1,h2,p1,p2, ckl + enddo + case (2) ! Beta single excitation + do i = 1, elec_alpha_num + p2=occ(i,1) + h2=p2 + call bielec_integrals_index(h1,h2,p1,p2,idx) + write(iunit_two_body_dm_ab,f) idx, h1,h2,p1,p2, ckl + enddo + do i = 1, elec_beta_num + p2=occ(i,2) + h2=p2 + call bielec_integrals_index(h1,h2,p1,p2,idx) + write(iunit_two_body_dm_bb,f) idx, h1,h2,p1,p2, ckl + call bielec_integrals_index(h1,h2,p2,p1,idx) + write(iunit_two_body_dm_bb,f) idx, h1,h2,p2,p1, -ckl + enddo + end select + endif + enddo + enddo + ! Sort file + ! Merge coefs + + close(iunit_two_body_dm_aa) + close(iunit_two_body_dm_ab) + close(iunit_two_body_dm_bb) + character*(128) :: filename + filename = trim(ezfio_filename)//'/work/two_body_aa.tmp' + call system('sort -n '//trim(filename)//' > '//trim(filename)//'2 ; cp '//trim(filename)//'2 '//trim(filename)) + filename = trim(ezfio_filename)//'/work/two_body_ab.tmp' + call system('sort -n '//trim(filename)//' > '//trim(filename)//'2 ; cp '//trim(filename)//'2 '//trim(filename)) + filename = trim(ezfio_filename)//'/work/two_body_bb.tmp' + call system('sort -n '//trim(filename)//' > '//trim(filename)//'2 ; cp '//trim(filename)//'2 '//trim(filename)) + iunit_two_body_dm_aa = getUnitAndOpen(trim(ezfio_filename)//'/work/two_body_aa.tmp','r') + iunit_two_body_dm_ab = getUnitAndOpen(trim(ezfio_filename)//'/work/two_body_ab.tmp','r') + iunit_two_body_dm_bb = getUnitAndOpen(trim(ezfio_filename)//'/work/two_body_bb.tmp','r') +END_PROVIDER + + + BEGIN_TEMPLATE + +BEGIN_PROVIDER [ integer, size_two_body_dm_$AA ] + implicit none + use bitmasks + BEGIN_DOC + ! Size of the two body $ALPHA density matrix + END_DOC + integer *8 :: key, key_old + rewind(iunit_two_body_dm_$AA) + size_two_body_dm_$AA = 0 + key = 0_8 + key_old = key + do while (.True.) + read(iunit_two_body_dm_$AA,*,END=99) key + if (key /= key_old) then + size_two_body_dm_$AA += 1 + key_old = key + endif + end do + 99 continue + +END_PROVIDER + + BEGIN_PROVIDER [ integer, two_body_dm_index_$AA, (4,size_two_body_dm_$AA) ] +&BEGIN_PROVIDER [ double precision, two_body_dm_value_$AA, (size_two_body_dm_$AA) ] + implicit none + use bitmasks + BEGIN_DOC + ! Two body $ALPHA density matrix + END_DOC + rewind(iunit_two_body_dm_$AA) + integer *8 :: key, key_old + integer :: ii, i,j,k,l + double precision :: c + key = 0_8 + key_old = key + ii = 0 + do while (.True.) + read(iunit_two_body_dm_$AA,*,END=99) key, i,j,k,l, c + if (key /= key_old) then + ii += 1 + two_body_dm_index_$AA(1,ii) = i + two_body_dm_index_$AA(2,ii) = j + two_body_dm_index_$AA(3,ii) = k + two_body_dm_index_$AA(4,ii) = l + two_body_dm_value_$AA(ii) = 0.d0 + key_old = key + endif + two_body_dm_value_$AA(ii) += c + enddo + 99 continue + close(iunit_two_body_dm_$AA, status='DELETE') +END_PROVIDER + + SUBST [ AA, ALPHA ] + + aa ; alpha-alpha ;; + ab ; alpha-beta ;; + bb ; beta-beta ;; + + END_TEMPLATE + + + BEGIN_PROVIDER [ double precision, two_body_dm_diag_aa, (mo_tot_num_align,mo_tot_num)] +&BEGIN_PROVIDER [ double precision, two_body_dm_diag_bb, (mo_tot_num_align,mo_tot_num)] +&BEGIN_PROVIDER [ double precision, two_body_dm_diag_ab, (mo_tot_num_align,mo_tot_num)] + implicit none + use bitmasks + BEGIN_DOC + ! diagonal part of the two body density matrix + END_DOC + integer :: i,j,k,e1,e2 + integer :: occ(N_int*bit_kind_size,2) + double precision :: ck + integer :: n_occ_alpha + two_body_dm_diag_aa=0.d0 + two_body_dm_diag_ab=0.d0 + two_body_dm_diag_bb=0.d0 + do k = 1, det_num + call bitstring_to_list(det_provider(1,1,k), occ(1,1), n_occ_alpha, N_int) + call bitstring_to_list(det_provider(1,2,k), occ(1,2), n_occ_alpha, N_int) + ck = det_coef_provider(k) * det_coef_provider(k) + do i = 1,elec_alpha_num + e1=occ(i,1) + do j = 1,elec_alpha_num + e2=occ(j,1) + ! alpha-alpha + two_body_dm_diag_aa(e1,e2) = two_body_dm_diag_aa(e1,e2) + ck + enddo + do j = 1,elec_beta_num + e2=occ(j,2) + ! alpha-beta + two_body_dm_diag_ab(e1,e2) = two_body_dm_diag_ab(e1,e2) + ck + enddo + enddo + do i = 1,elec_beta_num + e1=occ(i,2) + do j = 1,elec_beta_num + e2=occ(j,2) + ! beta-beta + two_body_dm_diag_bb(e1,e2) = two_body_dm_diag_bb(e1,e2) + ck + enddo + enddo + enddo +END_PROVIDER + + BEGIN_PROVIDER [ double precision, one_body_dm_a, (mo_tot_num_align,mo_tot_num) ] +&BEGIN_PROVIDER [ double precision, one_body_dm_b, (mo_tot_num_align,mo_tot_num) ] + implicit none + BEGIN_DOC + ! Alpha and beta one-body density matrix + END_DOC + + integer :: j,k,l + integer :: occ(N_int*bit_kind_size,2) + double precision :: ck, cl, ckl + double precision :: phase + integer :: h1,h2,p1,p2,s1,s2, degree + integer :: exc(0:2,2,2),n_occ_alpha + one_body_dm_a = 0.d0 + one_body_dm_b = 0.d0 + + do k=1,det_num + call bitstring_to_list(det_provider(1,1,k), occ(1,1), n_occ_alpha, N_int) + call bitstring_to_list(det_provider(1,2,k), occ(1,2), n_occ_alpha, N_int) + ck = det_coef_provider(k) + do l=1,elec_alpha_num + j = occ(l,1) + one_body_dm_a(j,j) += ck*ck + enddo + do l=1,elec_beta_num + j = occ(l,2) + one_body_dm_b(j,j) += ck*ck + enddo + do l=1,k-1 + call get_excitation_degree(det_provider(1,1,k),det_provider(1,1,l),degree,N_int) + if (degree /= 1) then + cycle + endif + call get_mono_excitation(det_provider(1,1,k),det_provider(1,1,l),exc,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + ckl = ck * det_coef_provider(l) * phase + if (s1==1) then + one_body_dm_a(h1,p1) += ckl + one_body_dm_a(p1,h1) += ckl + else + one_body_dm_b(h1,p1) += ckl + one_body_dm_b(p1,h1) += ckl + endif + enddo + enddo +END_PROVIDER + diff --git a/src/DensityMatrix/det_num.irp.f b/src/DensityMatrix/det_num.irp.f new file mode 100644 index 00000000..7c73b0a4 --- /dev/null +++ b/src/DensityMatrix/det_num.irp.f @@ -0,0 +1,56 @@ + use bitmasks + + BEGIN_PROVIDER [integer, det_num] + det_num = 10 + END_PROVIDER + + BEGIN_PROVIDER [ integer(bit_kind), det_provider, (N_int,2,det_num)] +&BEGIN_PROVIDER [ double precision , det_coef_provider, (det_num) ] + use bitmasks + implicit none + integer :: i + det_provider = 0 + det_provider(1,1,1 ) = #001f ! 0000 0000 0001 1111 + det_provider(1,1,2 ) = #003b ! 0000 0000 0011 1011 + det_provider(1,1,3 ) = #008f ! 0000 0000 1000 1111 + det_provider(1,1,4 ) = #0057 ! 0000 0000 0101 0111 + det_provider(1,1,5 ) = #100f ! 0001 0000 0000 1111 + det_provider(1,1,6 ) = #001f ! 0000 0000 0001 1111 + det_provider(1,1,7 ) = #003b ! 0000 0000 0011 1011 + det_provider(1,1,8 ) = #00c7 ! 0000 0000 1100 0111 + det_provider(1,1,9 ) = #00ab ! 0000 0000 1010 1011 + det_provider(1,1,10) = #0073 ! 0000 0000 0111 0011 + det_provider(1,2,1 ) = #0007 ! 0000 0000 0001 0111 + det_provider(1,2,2 ) = #0023 ! 0000 0000 0010 0011 + det_provider(1,2,3 ) = #0023 ! 0000 0000 0010 0011 + det_provider(1,2,4 ) = #0023 ! 0000 0000 0010 0011 + det_provider(1,2,5 ) = #0015 ! 0000 0000 0001 0101 + det_provider(1,2,6 ) = #000d ! 0000 0000 0000 1101 + det_provider(1,2,7 ) = #0007 ! 0000 0000 0000 0111 + det_provider(1,2,8 ) = #0007 ! 0000 0000 0000 0111 + det_provider(1,2,9 ) = #0007 ! 0000 0000 0000 0111 + det_provider(1,2,10) = #0007 ! 0000 0000 0000 0111 + det_coef_provider = (/ & + 0.993536117982429D+00, & + -0.556089064313864D-01, & + 0.403074722590178D-01, & + 0.403074717461626D-01, & + -0.340290975461932D-01, & + -0.340290958781670D-01, & + -0.333949939765448D-01, & + 0.333418373363987D-01, & + -0.316337211787351D-01, & + -0.316337207748718D-01 & +/) + + do i=1,10 + call write_bitstring( 6, det_provider(1,1,i), N_int ) + enddo + print *, '' + do i=1,10 + call write_bitstring( 6, det_provider(1,2,i), N_int ) + enddo + print *, '' + + +END_PROVIDER diff --git a/src/Dets/H_apply.irp.f b/src/Dets/H_apply.irp.f index 5fdeffc7..40fe4b1b 100644 --- a/src/Dets/H_apply.irp.f +++ b/src/Dets/H_apply.irp.f @@ -120,6 +120,10 @@ subroutine copy_H_apply_buffer_to_wf N_det = N_det + H_apply_buffer_N_det TOUCH N_det + if (psi_det_size < N_det) then + psi_det_size = N_det + TOUCH psi_det_size + endif do i=1,N_det_old do k=1,N_int psi_det(k,1,i) = buffer_det(k,1,i) diff --git a/src/Dets/README.rst b/src/Dets/README.rst index 8c7eaa4c..69c9cd33 100644 --- a/src/Dets/README.rst +++ b/src/Dets/README.rst @@ -51,7 +51,8 @@ Documentation .. NEEDED_MODULES file. `copy_h_apply_buffer_to_wf `_ -None + Undocumented + `h_apply_buffer_coef `_ Buffer of determinants/coefficients for H_apply. Uninitialized. Filled by H_apply subroutines. @@ -68,23 +69,49 @@ None Theshold on | | `resize_h_apply_buffer_det `_ -None + Undocumented + +`davidson_diag `_ + Davidson diagonalization. + .br + dets_in : bitmasks corresponding to determinants + .br + u_in : guess coefficients on the various states. Overwritten + on exit + .br + dim_in : leftmost dimension of u_in + .br + sze : Number of determinants + .br + N_st : Number of eigenstates + .br + Initial guess vectors are not necessarily orthonormal + +`davidson_iter_max `_ + Max number of Davidson iterations + +`davidson_sze_max `_ + Max number of Davidson sizes + `n_det `_ Number of determinants in the wave function -`n_det_generators `_ +`n_det_generators `_ Number of generator determinants in the wave function `n_states `_ Number of states to consider -`psi_coef `_ +`psi_coef `_ The wave function. Initialized with Hartree-Fock -`psi_det `_ +`psi_det `_ The wave function. Initialized with Hartree-Fock -`psi_generators `_ +`psi_det_size `_ + Size of the psi_det/psi_coef arrays + +`psi_generators `_ Determinants on which H is applied `double_exc_bitmask `_ @@ -108,10 +135,10 @@ None `get_s2 `_ Returns -`a_operator `_ +`a_operator `_ Needed for diag_H_mat_elem -`ac_operator `_ +`ac_operator `_ Needed for diag_H_mat_elem `decode_exc `_ @@ -121,15 +148,16 @@ None s1,s2 : Spins (1:alpha, 2:beta) degree : Degree of excitation -`diag_h_mat_elem `_ +`diag_h_mat_elem `_ Computes -`filter_connected `_ +`filter_connected `_ Filters out the determinants that are not connected by H `filter_connected_i_h_psi0 `_ -None -`get_double_excitation `_ + Undocumented + +`get_double_excitation `_ Returns the two excitation operators between two doubly excited determinants and the phase `get_excitation `_ @@ -138,20 +166,28 @@ None `get_excitation_degree `_ Returns the excitation degree between two determinants -`get_excitation_degree_vector `_ +`get_excitation_degree_vector `_ Applies get_excitation_degree to an array of determinants -`get_mono_excitation `_ +`get_mono_excitation `_ Returns the excitation operator between two singly excited determinants and the phase -`get_occ_from_key `_ +`get_occ_from_key `_ Returns a list of occupation numbers from a bitstring -`i_h_j `_ +`h_u_0 `_ + Computes v_0 = H|u_0> + .br + n : number of determinants + .br + H_jj : array of + +`i_h_j `_ Returns where i and j are determinants -`i_h_psim `_ -None +`i_h_psim `_ + Undocumented + `h_matrix_all_dets `_ H matrix on the basis of the slater deter;inants defined by psi_det diff --git a/src/Dets/davidson.irp.f b/src/Dets/davidson.irp.f new file mode 100644 index 00000000..6282d6a9 --- /dev/null +++ b/src/Dets/davidson.irp.f @@ -0,0 +1,261 @@ +BEGIN_PROVIDER [ integer, davidson_iter_max] + implicit none + BEGIN_DOC + ! Max number of Davidson iterations + END_DOC + davidson_iter_max = 100 +END_PROVIDER + +BEGIN_PROVIDER [ integer, davidson_sze_max] + implicit none + BEGIN_DOC + ! Max number of Davidson sizes + END_DOC + ASSERT (davidson_sze_max <= davidson_iter_max) + davidson_sze_max = 10 +END_PROVIDER + +subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, N_st, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(inout) :: u_in(dim_in,N_st) + double precision, intent(out) :: energies(N_st) + + integer :: iter + integer :: i,j,k,l + logical :: converged + + double precision :: overlap(N_st,N_st) + double precision :: u_dot_v, u_dot_u + + integer, allocatable :: kl_pairs(:,:) + integer :: k_pairs, kl + + integer :: iter2 + double precision, allocatable :: W(:,:), H_jj(:), U(:,:,:), R(:,:) + double precision, allocatable :: y(:,:,:,:), h(:,:,:,:), lambda(:) + double precision :: diag_h_mat_elem + double precision :: residual_norm(N_st) + + allocate( & + kl_pairs(2,N_st*(N_st+1)/2), & + H_jj(sze), & + W(sze,N_st), & + U(sze,N_st,davidson_sze_max), & + R(sze,N_st), & + h(N_st,davidson_sze_max,N_st,davidson_sze_max), & + y(N_st,davidson_sze_max,N_st,davidson_sze_max), & + lambda(N_st*davidson_sze_max)) + + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + ! Conventions: + ! i,j : 1,sze + ! k,l : 1,N_st + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(k,i) SHARED(U,u_in,sze,N_st) + do k=1,N_st + !$OMP DO + do i=1,sze + U(i,k,1) = u_in(i,k) + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL + + ! Orthonormalize initial guess + ! ============================ + + k_pairs=0 + do l=1,N_st + do k=1,l + k_pairs+=1 + kl_pairs(1,k_pairs) = k + kl_pairs(2,k_pairs) = l + enddo + enddo + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs) & + !$OMP PRIVATE(k,l,kl) + do kl=1,k_pairs + k = kl_pairs(1,kl) + l = kl_pairs(2,kl) + if (k==l) then + overlap(k,k) = u_dot_u(U(1,k,1),sze) + endif + overlap(k,l) = u_dot_v(U(1,k,1),U(1,l,1),sze) + overlap(l,k) = overlap(k,l) + enddo + !$OMP END PARALLEL DO + call ortho_lowdin(overlap,size(overlap,1),N_st,U,size(U,1),sze) + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i) & + !$OMP SHARED(H_jj,Nint,dets_in,sze) + do i=1,sze + H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint) + enddo + !$OMP END PARALLEL DO + + ! Davidson iterations + ! =================== + + converged = .False. + + do iter=1,davidson_sze_max-1 + print *, 'iter = ',iter + +! print *, '***************' +! do i=1,iter +! do k=1,N_st +! do j=1,iter +! do l=1,N_st +! print '(4(I4,X),F16.8)', i,j,k,l, u_dot_v(U(1,k,i),U(1,l,j),sze) +! enddo +! enddo +! enddo +! enddo +! print *, '***************' + + ! Compute W_k = H |u_k> + ! ---------------------- + + do k=1,N_st + call H_u_0(W(1,k),U(1,k,iter),H_jj,sze,dets_in,Nint) + enddo + + ! Compute h_kl = = + ! ------------------------------------------- + do l=1,N_st + do iter2=1,iter-1 + do k=1,N_st + h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l),sze) + h(k,iter,l,iter2) = h(k,iter2,l,iter) + enddo + enddo + do k=1,l + h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l),sze) + h(l,iter,k,iter) = h(k,iter,l,iter) + enddo + enddo + + ! Diagonalize h + ! ------------- + call lapack_diag(lambda,y,h,N_st*davidson_sze_max,N_st*iter) + + ! Express eigenvectors of h in the determinant basis + ! -------------------------------------------------- + + !TODO dgemm + do k=1,N_st + do i=1,sze + U(i,k,iter+1) = 0.d0 + do iter2=1,iter + do l=1,N_st + U(i,k,iter+1) += U(i,l,iter2)*y(l,iter2,k,1) + enddo + enddo + enddo + enddo + + ! Compute residual vector + ! ----------------------- + + do k=1,N_st + call H_u_0(W(1,k),U(1,k,iter+1),H_jj,sze,dets_in,Nint) + enddo + + do k=1,N_st + do i=1,sze + R(i,k) = lambda(k) * U(i,k,iter+1) - W(i,k) + enddo + residual_norm(k) = u_dot_u(R(1,k),sze) + enddo + print *, 'Lambda' + print *, lambda(1:N_st) + nuclear_repulsion + print *, 'Residual_norm' + print *, residual_norm(1:N_st) + print *, '' + + converged = maxval(residual_norm) < 1.d-10 + if (converged) then + exit + endif + + ! Davidson step + ! ------------- + + do k=1,N_st + do i=1,sze + U(i,k,iter+1) = 1.d0/(lambda(k) - H_jj(i)) * R(i,k) + enddo + enddo + + ! Gram-Schmidt + ! ------------ + + double precision :: c + do k=1,N_st + do iter2=1,iter + do l=1,N_st + c = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) + do i=1,sze + U(i,k,iter+1) -= c * U(i,l,iter2) + enddo + enddo + enddo + do l=1,k-1 + c = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) + do i=1,sze + U(i,k,iter+1) -= c * U(i,l,iter+1) + enddo + enddo + call normalize( U(1,k,iter+1), sze ) + enddo + + enddo + do k=1,N_st + energies(k) = lambda(k) + do i=1,sze + u_in(i,k) = 0.d0 + do iter2=1,iter + do l=1,N_st + u_in(i,k) += U(i,l,iter2)*y(l,iter2,k,1) + enddo + enddo + enddo + enddo + + deallocate ( & + kl_pairs, & + H_jj, & + W, & + U, & + R, & + h, & + y, & + lambda & + ) +end + diff --git a/src/Dets/determinants.irp.f b/src/Dets/determinants.irp.f index 829d5693..1cb89644 100644 --- a/src/Dets/determinants.irp.f +++ b/src/Dets/determinants.irp.f @@ -13,11 +13,19 @@ BEGIN_PROVIDER [ integer, N_det ] BEGIN_DOC ! Number of determinants in the wave function END_DOC - N_det = max(1,N_states) + N_det = 1 END_PROVIDER - BEGIN_PROVIDER [ integer(bit_kind), psi_det, (N_int,2,N_det) ] -&BEGIN_PROVIDER [ double precision, psi_coef, (N_det,N_states) ] +BEGIN_PROVIDER [ integer, psi_det_size ] + implicit none + BEGIN_DOC + ! Size of the psi_det/psi_coef arrays + END_DOC + psi_det_size = 1000 +END_PROVIDER + + BEGIN_PROVIDER [ integer(bit_kind), psi_det, (N_int,2,psi_det_size) ] +&BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states) ] implicit none BEGIN_DOC ! The wave function. Initialized with Hartree-Fock @@ -52,7 +60,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det END_PROVIDER -BEGIN_PROVIDER [ integer(bit_kind), psi_generators, (N_int,2,N_det) ] +BEGIN_PROVIDER [ integer(bit_kind), psi_generators, (N_int,2,psi_det_size) ] implicit none BEGIN_DOC ! Determinants on which H is applied diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index 93487c6d..a4814d2c 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -74,6 +74,7 @@ subroutine get_excitation(det1,det2,exc,degree,phase,Nint) end subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + use bitmasks implicit none BEGIN_DOC ! Decodes the exc arrays returned by get_excitation. @@ -488,6 +489,7 @@ end subroutine i_H_psim(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array) + use bitmasks implicit none integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate integer, intent(in) :: keys(Nint,2,Ndet_max) @@ -515,14 +517,14 @@ end -subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,sze_max,idx) +subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,idx) use bitmasks implicit none BEGIN_DOC ! Applies get_excitation_degree to an array of determinants END_DOC - integer, intent(in) :: Nint, sze,sze_max - integer(bit_kind), intent(in) :: key1(Nint,2,sze_max) + integer, intent(in) :: Nint, sze + integer(bit_kind), intent(in) :: key1(Nint,2,sze) integer(bit_kind), intent(in) :: key2(Nint,2) integer, intent(out) :: degree(sze) integer, intent(out) :: idx(0:sze) @@ -531,7 +533,6 @@ subroutine get_excitation_degree_vector(key1,key2,degree,Nint,sze,sze_max,idx) ASSERT (Nint > 0) ASSERT (sze > 0) - ASSERT (sze_max >= sze) l=1 if (Nint==1) then @@ -599,14 +600,14 @@ end -subroutine filter_connected(key1,key2,Nint,sze,sze_max,idx) +subroutine filter_connected(key1,key2,Nint,sze,idx) use bitmasks implicit none BEGIN_DOC ! Filters out the determinants that are not connected by H END_DOC - integer, intent(in) :: Nint, sze,sze_max - integer(bit_kind), intent(in) :: key1(Nint,2,sze_max) + integer, intent(in) :: Nint, sze + integer(bit_kind), intent(in) :: key1(Nint,2,sze) integer(bit_kind), intent(in) :: key2(Nint,2) integer, intent(out) :: idx(0:sze) @@ -615,7 +616,6 @@ subroutine filter_connected(key1,key2,Nint,sze,sze_max,idx) ASSERT (Nint > 0) ASSERT (sze > 0) - ASSERT (sze_max >= sze) l=1 @@ -684,11 +684,11 @@ subroutine filter_connected(key1,key2,Nint,sze,sze_max,idx) idx(0) = l-1 end -subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,sze_max,idx) +subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) use bitmasks implicit none - integer, intent(in) :: Nint, sze,sze_max - integer(bit_kind), intent(in) :: key1(Nint,2,sze_max) + integer, intent(in) :: Nint, sze + integer(bit_kind), intent(in) :: key1(Nint,2,sze) integer(bit_kind), intent(in) :: key2(Nint,2) integer, intent(out) :: idx(0:sze) @@ -697,7 +697,6 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,sze_max,idx) ASSERT (Nint > 0) ASSERT (sze > 0) - ASSERT (sze_max >= sze) l=1 @@ -777,7 +776,6 @@ end double precision function diag_H_mat_elem(det_in,Nint) - use bitmasks implicit none BEGIN_DOC ! Computes @@ -947,3 +945,46 @@ subroutine get_occ_from_key(key,occ,Nint) call bitstring_to_list(key(1,2), occ(1,2), tmp, Nint) end + +subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) + use bitmasks + implicit none + BEGIN_DOC +! Computes v_0 = H|u_0> +! +! n : number of determinants +! +! H_jj : array of + END_DOC + integer, intent(in) :: n,Nint + double precision, intent(out) :: v_0(n) + double precision, intent(in) :: u_0(n) + double precision, intent(in) :: H_jj(n) + integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) + integer, allocatable :: idx(:) + double precision :: hij + integer :: i,j,k,l, jj + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + ASSERT (n>0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,hij,j,k,idx,jj) SHARED(n,H_jj,u_0,keys_tmp,Nint)& + !$OMP SHARED(v_0) + allocate(idx(0:n)) + !$OMP DO SCHEDULE(dynamic) + do i=1,n + v_0(i) = H_jj(i) * u_0(i) + call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,n,idx) + do jj=1,idx(0) + j = idx(jj) + if (j/=i) then + call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) + v_0(i) = v_0(i) + hij*u_0(j) + endif + enddo + enddo + !$OMP END DO + deallocate(idx) + !$OMP END PARALLEL +end + diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 5912f0ac..ecae8cdc 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -1,10 +1,24 @@ -subroutine ortho_lowdin(overlap,lda,n,C,ldc,m) +subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m) implicit none BEGIN_DOC - ! Compute U.S^-1/2 canonical orthogonalization + ! Compute C_new=C_old.S^-1/2 canonical orthogonalization. + ! + ! overlap : overlap matrix + ! + ! LDA : leftmost dimension of overlap array + ! + ! N : Overlap matrix is NxN (array is (LDA,N) ) + ! + ! C : Coefficients of the vectors to orthogonalize. On exit, + ! orthogonal vectors + ! + ! LDC : leftmost dimension of C + ! + ! m : Coefficients matrix is MxN, ( array is (LDC,N) ) + ! END_DOC - integer, intent(in) :: lda, ldc, n, m + integer, intent(in) :: LDA, ldc, n, m double precision, intent(in) :: overlap(lda,n) double precision, intent(inout) :: C(ldc,n) double precision :: U(ldc,n) @@ -34,37 +48,45 @@ subroutine ortho_lowdin(overlap,lda,n,C,ldc,m) stop endif + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(S_half,U,D,Vt,n,C,m) & + !$OMP PRIVATE(i,j,k) + + !$OMP DO do i=1,n if ( D(i) < 1.d-6 ) then D(i) = 0.d0 else D(i) = 1.d0/dsqrt(D(i)) endif - enddo - - S_half = 0.d0 - do k=1,n do j=1,n - do i=1,n - S_half(i,j) += U(i,k)*D(k)*Vt(k,j) - enddo + S_half(j,i) = 0.d0 enddo enddo + !$OMP END DO + + do k=1,n + !$OMP DO + do j=1,n + do i=1,n + S_half(i,j) = S_half(i,j) + U(i,k)*D(k)*Vt(k,j) + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP DO do j=1,n do i=1,m U(i,j) = C(i,j) enddo enddo + !$OMP END DO - C = 0.d0 - do j=1,n - do i=1,m - do k=1,n - C(i,j) += U(i,k)*S_half(k,j) - enddo - enddo - enddo + !$OMP END PARALLEL + + call dgemm('N','N',m,n,n,1.d0,U,size(U,1),S_half,size(S_half,1),0.d0,C,size(C,1)) end @@ -171,6 +193,17 @@ subroutine lapack_diag(eigvalues,eigvectors,H,nmax,n) allocate(A(nmax,n),eigenvalues(nmax),work(4*nmax)) integer :: LWORK, info, i,j,l,k A=H + +! if (n<30) then +! do i=1,n +! do j=1,n +! print *, j,i, H(j,i) +! enddo +! print *, '---' +! enddo +! print *, '---' +! endif + LWORK = 4*nmax call dsyev( 'V', 'U', n, A, nmax, eigenvalues, work, LWORK, info ) if (info < 0) then diff --git a/src/Utils/util.irp.f b/src/Utils/util.irp.f index 20e09917..e7116334 100644 --- a/src/Utils/util.irp.f +++ b/src/Utils/util.irp.f @@ -214,7 +214,6 @@ double precision function u_dot_v(u,v,sze) implicit none BEGIN_DOC ! Compute - ! u and v are expected to be aligned in memory. END_DOC integer, intent(in) :: sze double precision, intent(in) :: u(sze),v(sze) @@ -227,14 +226,10 @@ double precision function u_dot_v(u,v,sze) t3 = t2+t2 t4 = t3+t2 u_dot_v = 0.d0 - !DIR$ VECTOR ALWAYS - !DIR$ VECTOR ALIGNED do i=1,t2 u_dot_v = u_dot_v + u(t1+i)*v(t1+i) + u(t2+i)*v(t2+i) + & u(t3+i)*v(t3+i) + u(t4+i)*v(t4+i) enddo - !DIR$ VECTOR ALWAYS - !DIR$ VECTOR ALIGNED do i=t4+t2+1,sze u_dot_v = u_dot_v + u(i)*v(i) enddo @@ -245,7 +240,6 @@ double precision function u_dot_u(u,sze) implicit none BEGIN_DOC ! Compute - ! u is expected to be aligned in memory. END_DOC integer, intent(in) :: sze double precision, intent(in) :: u(sze) From df9efae33541919dfcf7161e85af1ad1a9b71d6f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 15 May 2014 00:28:25 +0200 Subject: [PATCH 3/4] Accelerated Davidson --- src/Dets/davidson.irp.f | 296 ++++++++++++++++++++---------------- src/Dets/slater_rules.irp.f | 2 +- 2 files changed, 162 insertions(+), 136 deletions(-) diff --git a/src/Dets/davidson.irp.f b/src/Dets/davidson.irp.f index 6282d6a9..928bcb72 100644 --- a/src/Dets/davidson.irp.f +++ b/src/Dets/davidson.irp.f @@ -12,7 +12,7 @@ BEGIN_PROVIDER [ integer, davidson_sze_max] ! Max number of Davidson sizes END_DOC ASSERT (davidson_sze_max <= davidson_iter_max) - davidson_sze_max = 10 + davidson_sze_max = 4 END_PROVIDER subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) @@ -40,7 +40,7 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) double precision, intent(out) :: energies(N_st) integer :: iter - integer :: i,j,k,l + integer :: i,j,k,l,m logical :: converged double precision :: overlap(N_st,N_st) @@ -69,23 +69,9 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) ASSERT (sze > 0) ASSERT (Nint > 0) ASSERT (Nint == N_int) - ! Conventions: - ! i,j : 1,sze - ! k,l : 1,N_st - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(k,i) SHARED(U,u_in,sze,N_st) - do k=1,N_st - !$OMP DO - do i=1,sze - U(i,k,1) = u_in(i,k) - enddo - !$OMP END DO NOWAIT - enddo - !$OMP END PARALLEL - - ! Orthonormalize initial guess - ! ============================ + ! Initialization + ! ============== k_pairs=0 do l=1,N_st @@ -95,158 +81,198 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) kl_pairs(2,k_pairs) = l enddo enddo - !$OMP PARALLEL DO DEFAULT(NONE) & - !$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs) & - !$OMP PRIVATE(k,l,kl) - do kl=1,k_pairs - k = kl_pairs(1,kl) - l = kl_pairs(2,kl) - if (k==l) then - overlap(k,k) = u_dot_u(U(1,k,1),sze) - endif - overlap(k,l) = u_dot_v(U(1,k,1),U(1,l,1),sze) - overlap(l,k) = overlap(k,l) - enddo - !$OMP END PARALLEL DO - call ortho_lowdin(overlap,size(overlap,1),N_st,U,size(U,1),sze) - !$OMP PARALLEL DO DEFAULT(NONE) & - !$OMP PRIVATE(i) & - !$OMP SHARED(H_jj,Nint,dets_in,sze) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs, & + !$OMP H_jj,Nint,dets_in,u_in) & + !$OMP PRIVATE(k,l,kl,i) + + !$OMP DO do i=1,sze H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint) enddo - !$OMP END PARALLEL DO + !$OMP END DO NOWAIT + + ! Orthonormalize initial guess + ! ============================ + + !$OMP DO + do kl=1,k_pairs + k = kl_pairs(1,kl) + l = kl_pairs(2,kl) + if (k/=l) then + overlap(k,l) = u_dot_v(U_in(1,k),U_in(1,l),sze) + overlap(l,k) = overlap(k,l) + else + overlap(k,k) = u_dot_u(U_in(1,k),sze) + endif + enddo + !$OMP END DO + !$OMP END PARALLEL + call ortho_lowdin(overlap,size(overlap,1),N_st,U_in,size(U_in,1),sze) ! Davidson iterations ! =================== converged = .False. - do iter=1,davidson_sze_max-1 - print *, 'iter = ',iter - -! print *, '***************' -! do i=1,iter -! do k=1,N_st -! do j=1,iter -! do l=1,N_st -! print '(4(I4,X),F16.8)', i,j,k,l, u_dot_v(U(1,k,i),U(1,l,j),sze) -! enddo -! enddo -! enddo -! enddo -! print *, '***************' - - ! Compute W_k = H |u_k> - ! ---------------------- + do while (.not.converged) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(k,i) SHARED(U,u_in,sze,N_st) do k=1,N_st - call H_u_0(W(1,k),U(1,k,iter),H_jj,sze,dets_in,Nint) + !$OMP DO + do i=1,sze + U(i,k,1) = u_in(i,k) + enddo + !$OMP END DO enddo + !$OMP END PARALLEL - ! Compute h_kl = = - ! ------------------------------------------- - do l=1,N_st - do iter2=1,iter-1 - do k=1,N_st - h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l),sze) - h(k,iter,l,iter2) = h(k,iter2,l,iter) + do iter=1,davidson_sze_max-1 + print *, 'iter = ',iter + + ! print *, '***************' + ! do i=1,iter + ! do k=1,N_st + ! do j=1,iter + ! do l=1,N_st + ! print '(4(I4,X),F16.8)', i,j,k,l, u_dot_v(U(1,k,i),U(1,l,j),sze) + ! enddo + ! enddo + ! enddo + ! enddo + ! print *, '***************' + + ! Compute W_k = H |u_k> + ! ---------------------- + + do k=1,N_st + call H_u_0(W(1,k),U(1,k,iter),H_jj,sze,dets_in,Nint) + enddo + + ! Compute h_kl = = + ! ------------------------------------------- + + !$OMP PARALLEL + !$OMP SINGLE + do l=1,N_st + do iter2=1,iter-1 + do k=1,N_st + !$OMP TASK FIRSTPRIVATE(k,iter,l,iter2) + h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l),sze) + h(k,iter,l,iter2) = h(k,iter2,l,iter) + !$OMP END TASK + enddo + enddo + do k=1,l + !$OMP TASK FIRSTPRIVATE(k,iter,l) + h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l),sze) + h(l,iter,k,iter) = h(k,iter,l,iter) + !$OMP END TASK enddo enddo - do k=1,l - h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l),sze) - h(l,iter,k,iter) = h(k,iter,l,iter) + !$OMP END SINGLE NOWAIT + !$OMP TASKWAIT + !$OMP END PARALLEL + + ! Diagonalize h + ! ------------- + call lapack_diag(lambda,y,h,N_st*davidson_sze_max,N_st*iter) + + print *, lambda(1:4) + ! Express eigenvectors of h in the determinant basis + ! -------------------------------------------------- + + !TODO dgemm + do k=1,N_st + do i=1,sze + U(i,k,iter+1) = 0.d0 + do iter2=1,iter + do l=1,N_st + U(i,k,iter+1) += U(i,l,iter2)*y(l,iter2,k,1) + enddo + enddo + enddo enddo - enddo - - ! Diagonalize h - ! ------------- - call lapack_diag(lambda,y,h,N_st*davidson_sze_max,N_st*iter) - - ! Express eigenvectors of h in the determinant basis - ! -------------------------------------------------- - - !TODO dgemm - do k=1,N_st - do i=1,sze - U(i,k,iter+1) = 0.d0 + + ! Compute residual vector + ! ----------------------- + + do k=1,N_st + call H_u_0(W(1,k),U(1,k,iter+1),H_jj,sze,dets_in,Nint) + enddo + + do k=1,N_st + do i=1,sze + R(i,k) = lambda(k) * U(i,k,iter+1) - W(i,k) + enddo + residual_norm(k) = u_dot_u(R(1,k),sze) + enddo + print *, 'Lambda' + print *, lambda(1:N_st) + nuclear_repulsion + print *, 'Residual_norm' + print *, residual_norm(1:N_st) + print *, '' + + converged = maxval(residual_norm) < 1.d-10 + if (converged) then + exit + endif + + ! Davidson step + ! ------------- + + do k=1,N_st + do i=1,sze + U(i,k,iter+1) = 1.d0/(lambda(k) - H_jj(i)) * R(i,k) + enddo + enddo + + ! Gram-Schmidt + ! ------------ + + double precision :: c + do k=1,N_st do iter2=1,iter do l=1,N_st - U(i,k,iter+1) += U(i,l,iter2)*y(l,iter2,k,1) + c = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) + do i=1,sze + U(i,k,iter+1) -= c * U(i,l,iter2) + enddo enddo enddo + do l=1,k-1 + c = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) + do i=1,sze + U(i,k,iter+1) -= c * U(i,l,iter+1) + enddo + enddo + call normalize( U(1,k,iter+1), sze ) enddo enddo - ! Compute residual vector - ! ----------------------- - - do k=1,N_st - call H_u_0(W(1,k),U(1,k,iter+1),H_jj,sze,dets_in,Nint) - enddo - - do k=1,N_st - do i=1,sze - R(i,k) = lambda(k) * U(i,k,iter+1) - W(i,k) - enddo - residual_norm(k) = u_dot_u(R(1,k),sze) - enddo - print *, 'Lambda' - print *, lambda(1:N_st) + nuclear_repulsion - print *, 'Residual_norm' - print *, residual_norm(1:N_st) - print *, '' - - converged = maxval(residual_norm) < 1.d-10 - if (converged) then - exit + if (.not.converged) then + iter = davidson_sze_max endif - ! Davidson step - ! ------------- + ! Re-contract to u_in + ! ----------- do k=1,N_st + energies(k) = lambda(k) do i=1,sze - U(i,k,iter+1) = 1.d0/(lambda(k) - H_jj(i)) * R(i,k) - enddo - enddo - - ! Gram-Schmidt - ! ------------ - - double precision :: c - do k=1,N_st - do iter2=1,iter - do l=1,N_st - c = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) - do i=1,sze - U(i,k,iter+1) -= c * U(i,l,iter2) + u_in(i,k) = 0.d0 + do iter2=1,iter + do l=1,N_st + u_in(i,k) += U(i,l,iter2)*y(l,iter2,k,1) enddo enddo enddo - do l=1,k-1 - c = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) - do i=1,sze - U(i,k,iter+1) -= c * U(i,l,iter+1) - enddo - enddo - call normalize( U(1,k,iter+1), sze ) enddo enddo - do k=1,N_st - energies(k) = lambda(k) - do i=1,sze - u_in(i,k) = 0.d0 - do iter2=1,iter - do l=1,N_st - u_in(i,k) += U(i,l,iter2)*y(l,iter2,k,1) - enddo - enddo - enddo - enddo - + deallocate ( & kl_pairs, & H_jj, & diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index a4814d2c..4e45f586 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -971,7 +971,7 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) !$OMP PRIVATE(i,hij,j,k,idx,jj) SHARED(n,H_jj,u_0,keys_tmp,Nint)& !$OMP SHARED(v_0) allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do i=1,n v_0(i) = H_jj(i) * u_0(i) call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,n,idx) From 13ee3531eee54eb5fc8708b446acb4729e31b746 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 15 May 2014 17:58:30 +0200 Subject: [PATCH 4/4] Acceleration of filter_connected and Davidson --- src/AOs/tests/Makefile | 33 ++++++++++++++++++++++ src/BiInts/README.rst | 3 +- src/CISD/cisd.irp.f | 11 ++++++-- src/Dets/H_apply_template.f | 4 +-- src/Dets/davidson.irp.f | 50 +++++++++++++++------------------- src/Dets/slater_rules.irp.f | 34 +++++++++++++++-------- src/Hartree_Fock/README.rst | 3 +- src/MOs/README.rst | 6 ++-- src/MonoInts/kin_ao_ints.irp.f | 1 + src/Utils/map_module.f90 | 2 +- src/Utils/util.irp.f | 16 +++++++---- 11 files changed, 108 insertions(+), 55 deletions(-) create mode 100644 src/AOs/tests/Makefile diff --git a/src/AOs/tests/Makefile b/src/AOs/tests/Makefile new file mode 100644 index 00000000..77bd84ba --- /dev/null +++ b/src/AOs/tests/Makefile @@ -0,0 +1,33 @@ +OPENMP =1 +PROFILE =0 +DEBUG = 0 + +IRPF90+= -I tests + +REF_FILES=$(subst %.irp.f, %.ref, $(wildcard *.irp.f)) + +.PHONY: clean executables serial_tests parallel_tests + +all: clean executables serial_tests parallel_tests + +parallel_tests: $(REF_FILES) + @echo ; echo " ---- Running parallel tests ----" ; echo + @OMP_NUM_THREADS=10 ${QPACKAGE_ROOT}/scripts/run_tests.py + +serial_tests: $(REF_FILES) + @echo ; echo " ---- Running serial tests ----" ; echo + @OMP_NUM_THREADS=1 ${QPACKAGE_ROOT}/scripts/run_tests.py + +executables: $(wildcard *.irp.f) veryclean + $(MAKE) -C .. + +%.ref: $(wildcard $(QPACKAGE_ROOT)/data/inputs/*.md5) executables + $(QPACKAGE_ROOT)/scripts/create_test_ref.sh $* + +clean: + $(MAKE) -C .. clean + +veryclean: + $(MAKE) -C .. veryclean + + diff --git a/src/BiInts/README.rst b/src/BiInts/README.rst index b2cdffc7..48fa022b 100644 --- a/src/BiInts/README.rst +++ b/src/BiInts/README.rst @@ -107,7 +107,8 @@ Documentation AO integrals `bielec_integrals_index `_ -None + Undocumented + `clear_ao_map `_ Frees the memory of the AO map diff --git a/src/CISD/cisd.irp.f b/src/CISD/cisd.irp.f index 0b68124a..d2f6de94 100644 --- a/src/CISD/cisd.irp.f +++ b/src/CISD/cisd.irp.f @@ -1,11 +1,16 @@ program cisd implicit none - integer :: i + integer :: i,k double precision, allocatable :: eigvalues(:),eigvectors(:,:) + PROVIDE ref_bitmask_energy call H_apply_cisd - allocate(eigvalues(n_det),eigvectors(n_det,n_det)) + allocate(eigvalues(n_states),eigvectors(n_det,n_states)) print *, 'N_det = ', N_det - psi_coef = psi_coef - 1.d-4 + print *, 'N_states = ', N_states + psi_coef = - 1.d-4 + do k=1,N_states + psi_coef(k,k) = 1.d0 + enddo call davidson_diag(psi_det,psi_coef,eigvalues,size(psi_coef,1),N_det,N_states,N_int) print *, '---' diff --git a/src/Dets/H_apply_template.f b/src/Dets/H_apply_template.f index 21c73377..3c13d31f 100644 --- a/src/Dets/H_apply_template.f +++ b/src/Dets/H_apply_template.f @@ -26,7 +26,7 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2 $parame integer, allocatable :: ia_ja_pairs(:,:,:) double precision :: diag_H_mat_elem, E_ref - PROVIDE mo_integrals_map + PROVIDE mo_integrals_map ref_bitmask_energy PROVIDE mo_bielec_integrals_in_map $set_i_H_j_threshold @@ -257,7 +257,7 @@ subroutine $subroutine_monoexc(key_in, hole_1,particl_1 $parameters ) integer, allocatable :: ia_ja_pairs(:,:,:) double precision :: diag_H_mat_elem, E_ref - PROVIDE mo_integrals_map + PROVIDE mo_integrals_map ref_bitmask_energy PROVIDE mo_bielec_integrals_in_map $set_i_H_j_threshold diff --git a/src/Dets/davidson.irp.f b/src/Dets/davidson.irp.f index 928bcb72..780d54b5 100644 --- a/src/Dets/davidson.irp.f +++ b/src/Dets/davidson.irp.f @@ -12,7 +12,7 @@ BEGIN_PROVIDER [ integer, davidson_sze_max] ! Max number of Davidson sizes END_DOC ASSERT (davidson_sze_max <= davidson_iter_max) - davidson_sze_max = 4 + davidson_sze_max = 8 END_PROVIDER subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) @@ -50,15 +50,17 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) integer :: k_pairs, kl integer :: iter2 - double precision, allocatable :: W(:,:), H_jj(:), U(:,:,:), R(:,:) + double precision, allocatable :: W(:,:,:), H_jj(:), U(:,:,:), R(:,:) double precision, allocatable :: y(:,:,:,:), h(:,:,:,:), lambda(:) double precision :: diag_h_mat_elem double precision :: residual_norm(N_st) + PROVIDE ref_bitmask_energy + allocate( & kl_pairs(2,N_st*(N_st+1)/2), & H_jj(sze), & - W(sze,N_st), & + W(sze,N_st,davidson_sze_max), & U(sze,N_st,davidson_sze_max), & R(sze,N_st), & h(N_st,davidson_sze_max,N_st,davidson_sze_max), & @@ -109,6 +111,7 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) enddo !$OMP END DO !$OMP END PARALLEL + call ortho_lowdin(overlap,size(overlap,1),N_st,U_in,size(U_in,1),sze) ! Davidson iterations @@ -148,33 +151,24 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) ! ---------------------- do k=1,N_st - call H_u_0(W(1,k),U(1,k,iter),H_jj,sze,dets_in,Nint) + call H_u_0(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint) enddo ! Compute h_kl = = ! ------------------------------------------- - !$OMP PARALLEL - !$OMP SINGLE do l=1,N_st - do iter2=1,iter-1 - do k=1,N_st - !$OMP TASK FIRSTPRIVATE(k,iter,l,iter2) - h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l),sze) + do k=1,N_st + do iter2=1,iter-1 + h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze) h(k,iter,l,iter2) = h(k,iter2,l,iter) - !$OMP END TASK enddo enddo do k=1,l - !$OMP TASK FIRSTPRIVATE(k,iter,l) - h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l),sze) + h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze) h(l,iter,k,iter) = h(k,iter,l,iter) - !$OMP END TASK enddo enddo - !$OMP END SINGLE NOWAIT - !$OMP TASKWAIT - !$OMP END PARALLEL ! Diagonalize h ! ------------- @@ -184,13 +178,17 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- - !TODO dgemm + ! call dgemm ( 'N','N', sze, N_st*iter, N_st, & + ! 1.d0, U(1,1,1), size(U,1), y(1,1,1,1), size(y,1)*size(y,2), & + ! 0.d0, U(1,1,iter+1), size(U,1) ) do k=1,N_st do i=1,sze U(i,k,iter+1) = 0.d0 - do iter2=1,iter - do l=1,N_st - U(i,k,iter+1) += U(i,l,iter2)*y(l,iter2,k,1) + W(i,k,iter+1) = 0.d0 + do l=1,N_st + do iter2=1,iter + U(i,k,iter+1) = U(i,k,iter+1) + U(i,l,iter2)*y(l,iter2,k,1) + W(i,k,iter+1) = W(i,k,iter+1) + W(i,l,iter2)*y(l,iter2,k,1) enddo enddo enddo @@ -199,13 +197,9 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) ! Compute residual vector ! ----------------------- - do k=1,N_st - call H_u_0(W(1,k),U(1,k,iter+1),H_jj,sze,dets_in,Nint) - enddo - do k=1,N_st do i=1,sze - R(i,k) = lambda(k) * U(i,k,iter+1) - W(i,k) + R(i,k) = lambda(k) * U(i,k,iter+1) - W(i,k,iter+1) enddo residual_norm(k) = u_dot_u(R(1,k),sze) enddo @@ -215,7 +209,7 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) print *, residual_norm(1:N_st) print *, '' - converged = maxval(residual_norm) < 1.d-10 + converged = maxval(residual_norm) < 1.d-5 if (converged) then exit endif @@ -253,7 +247,7 @@ subroutine davidson_diag(dets_in,u_in,energies,dim_in,sze,N_st,Nint) enddo if (.not.converged) then - iter = davidson_sze_max + iter = davidson_sze_max-1 endif ! Re-contract to u_in diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index 4e45f586..59dc68ad 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -964,26 +964,38 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) integer, allocatable :: idx(:) double precision :: hij integer :: i,j,k,l, jj + integer :: i0, j0 ASSERT (Nint > 0) ASSERT (Nint == N_int) - ASSERT (n>0) + ASSERT (n>0) + PROVIDE ref_bitmask_energy + integer, parameter :: block_size = 157 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,j,k,idx,jj) SHARED(n,H_jj,u_0,keys_tmp,Nint)& !$OMP SHARED(v_0) - allocate(idx(0:n)) - !$OMP DO SCHEDULE(guided) + allocate(idx(0:block_size)) + !$OMP DO SCHEDULE(static) do i=1,n v_0(i) = H_jj(i) * u_0(i) - call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,n,idx) - do jj=1,idx(0) - j = idx(jj) - if (j/=i) then - call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) - v_0(i) = v_0(i) + hij*u_0(j) - endif - enddo enddo !$OMP END DO + !$OMP DO SCHEDULE(guided) + do i0=1,n,block_size + do j0=1,n,block_size + do i=i0,min(i0+block_size-1,n) + call filter_connected(keys_tmp(1,1,j0),keys_tmp(1,1,i),Nint,min(block_size,i-j0+1),idx) + do jj=1,idx(0) + j = idx(jj)+j0-1 + if ( (j 1.d-8)) then + call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) + v_0(i) = v_0(i) + hij*u_0(j) + v_0(j) = v_0(j) + hij*u_0(i) + endif + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT deallocate(idx) !$OMP END PARALLEL end diff --git a/src/Hartree_Fock/README.rst b/src/Hartree_Fock/README.rst index 94c85fc1..bd52e526 100644 --- a/src/Hartree_Fock/README.rst +++ b/src/Hartree_Fock/README.rst @@ -82,7 +82,8 @@ Documentation Diagonal Fock matrix in the MO basis `scf_iteration `_ -None + Undocumented + `do_diis `_ If True, compute integrals on the fly diff --git a/src/MOs/README.rst b/src/MOs/README.rst index 3e4981df..659db5d5 100644 --- a/src/MOs/README.rst +++ b/src/MOs/README.rst @@ -54,8 +54,10 @@ Documentation Aligned variable for dimensioning of arrays `mo_as_eigvectors_of_mo_matrix `_ -None + Undocumented + `save_mos `_ -None + Undocumented + diff --git a/src/MonoInts/kin_ao_ints.irp.f b/src/MonoInts/kin_ao_ints.irp.f index 444c3880..0a5cccd3 100644 --- a/src/MonoInts/kin_ao_ints.irp.f +++ b/src/MonoInts/kin_ao_ints.irp.f @@ -11,6 +11,7 @@ double precision :: A_center(3), B_center(3) integer :: power_A(3), power_B(3) double precision :: d_a_2,d_2 + PROVIDE all_utils dim1=100 BEGIN_DOC ! second derivatives matrix elements in the ao basis diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index ff6813e4..ba36e323 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -482,7 +482,7 @@ subroutine cache_map_get_interval(map, key, value, ibegin, iend, idx) if (idx > 0) then value = map%value(idx) else - value = 0. + value = 0._integral_kind endif end diff --git a/src/Utils/util.irp.f b/src/Utils/util.irp.f index e7116334..e25148f1 100644 --- a/src/Utils/util.irp.f +++ b/src/Utils/util.irp.f @@ -253,12 +253,16 @@ double precision function u_dot_u(u,sze) t3 = t2+t2 t4 = t3+t2 u_dot_u = 0.d0 - do i=1,t2 - u_dot_u = u_dot_u + u(t1+i)*u(t1+i) + u(t2+i)*u(t2+i) + & - u(t3+i)*u(t3+i) + u(t4+i)*u(t4+i) - enddo - do i=t4+t2+1,sze - u_dot_u = u_dot_u+u(i)*u(i) +! do i=1,t2 +! u_dot_u = u_dot_u + u(t1+i)*u(t1+i) + u(t2+i)*u(t2+i) + & +! u(t3+i)*u(t3+i) + u(t4+i)*u(t4+i) +! enddo +! do i=t4+t2+1,sze +! u_dot_u = u_dot_u+u(i)*u(i) +! enddo + + do i=1,sze + u_dot_u = u_dot_u + u(i)*u(i) enddo end