From c0178a11ea85528e646ef550453ddd7389b1f2cf Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 4 Feb 2022 16:27:06 +0100 Subject: [PATCH 01/70] Working on csf sigma vector verification. --- external/qp2-dependencies | 2 +- src/csf/configuration_CI_sigma_helpers.irp.f | 658 ++++++-- src/csf/sigma_vector.irp.f | 1399 +++++++++++++++-- .../diagonalization_hcsf_dressed.irp.f | 42 +- 4 files changed, 1908 insertions(+), 193 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index bc856147..90ee61f5 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit bc856147f6e626a6616b20344e5b8e3f30f44a92 +Subproject commit 90ee61f5041c7c94a0c605625a264860292813a0 diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index f73362eb..a4aa41ff 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -1,3 +1,566 @@ +use bitmasks + + BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))] +&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ] + implicit none + !use bitmasks + BEGIN_DOC + ! Documentation for alphasI + ! Returns the associated alpha's for + ! the input configuration Icfg. + END_DOC + + integer :: idxI ! The id of the Ith CFG + integer(bit_kind) :: Icfg(N_int,2) + integer :: NalphaIcfg + logical,dimension(:,:),allocatable :: tableUniqueAlphas + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ndiffAll + integer :: i + integer :: j + integer :: k + integer :: kstart + integer :: kend + integer :: Nsomo_I + integer :: hole + integer :: p + integer :: q + integer :: countalphas + logical :: pqAlreadyGenQ + logical :: pqExistsQ + logical :: ppExistsQ + + double precision :: t0, t1 + call wall_time(t0) + + allocate(tableUniqueAlphas(mo_num,mo_num)) + NalphaIcfg_list = 0 + + do idxI = 1, N_configuration + + Icfg = psi_configuration(:,:,idxI) + + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + do i = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do i = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 2 + endif + end do + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do i = 1,mo_num + if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then + if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 2 + end if + end if + end do + + tableUniqueAlphas = .FALSE. + + ! Now find the allowed (p,q) excitations + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Nsomo_I = POPCNT(Isomo) + if(Nsomo_I .EQ. 0) then + kstart = 1 + else + kstart = cfg_seniority_index(max(0,Nsomo_I-2)) + endif + kend = idxI-1 + + do i = 1,nholes + p = listholes(i) + do j = 1,nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kend = idxI-1 + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-4))) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + kstart = cfg_seniority_index(Nsomo_I) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kend = idxI-1 + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! Again, we don't have to search from 1 + ! we just use seniority to find the + ! first index with NSOMO - 2 to NSOMO + 2 + ! this is what is done in kstart, kend + + pqAlreadyGenQ = .FALSE. + ! First check if it can be generated before + do k = kstart, kend + diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle + diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then + pqAlreadyGenQ = .TRUE. + ppExistsQ = .TRUE. + EXIT + endif + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + pqAlreadyGenQ = .TRUE. + EXIT + endif + end do + + if(pqAlreadyGenQ) cycle + + pqExistsQ = .FALSE. + + if(.NOT. pqExistsQ) then + tableUniqueAlphas(p,q) = .TRUE. + endif + end do + end do + + !print *,tableUniqueAlphas(:,:) + + ! prune list of alphas + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + Jsomo = Icfg(1,1) + Jdomo = Icfg(1,2) + NalphaIcfg = 0 + do i = 1, nholes + p = listholes(i) + do j = 1, nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(tableUniqueAlphas(p,q)) then + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! SOMO + NalphaIcfg += 1 + !print *,i,j,"|",NalphaIcfg + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo + alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + NalphaIcfg_list(idxI) = NalphaIcfg + endif + end do + end do + + ! Check if this Icfg has been previously generated as a mono + ppExistsQ = .False. + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + do k = kstart, idxI-1 + diffSOMO = IEOR(Isomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if (ndiffSOMO /= 2) cycle + diffDOMO = IEOR(Idomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then + ppExistsQ = .TRUE. + EXIT + endif + end do + ! Diagonal part (pp,qq) + if(nholes > 0 .AND. (.NOT. ppExistsQ))then + ! SOMO + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1) + alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2) + NalphaIcfg_list(idxI) = NalphaIcfg + endif + + NalphaIcfg = 0 + enddo ! end loop idxI + call wall_time(t1) + print *, 'Preparation : ', t1 - t0 + +END_PROVIDER + + subroutine obtain_associated_alphaI(idxI, Icfg, alphasIcfg, NalphaIcfg) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for alphasI + ! Returns the associated alpha's for + ! the input configuration Icfg. + END_DOC + + integer,intent(in) :: idxI ! The id of the Ith CFG + integer(bit_kind),intent(in) :: Icfg(N_int,2) + integer,intent(out) :: NalphaIcfg + integer(bit_kind),intent(out) :: alphasIcfg(N_int,2,*) + logical,dimension(:,:),allocatable :: tableUniqueAlphas + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ndiffAll + integer :: i + integer :: j + integer :: k + integer :: kstart + integer :: kend + integer :: Nsomo_I + integer :: hole + integer :: p + integer :: q + integer :: countalphas + logical :: pqAlreadyGenQ + logical :: pqExistsQ + logical :: ppExistsQ + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + !print*,"Input cfg" + !call debug_spindet(Isomo,1) + !call debug_spindet(Idomo,1) + + !print*,n_act_orb, "monum=",mo_num," n_core=",n_core_orb + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + do i = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do i = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 2 + endif + end do + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do i = 1,mo_num + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 2 + end if + end do + + !print *,"Nvmo=",nvmos + !print *,listvmos + !print *,vmotype + + allocate(tableUniqueAlphas(mo_num,mo_num)) + tableUniqueAlphas = .FALSE. + + ! Now find the allowed (p,q) excitations + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Nsomo_I = POPCNT(Isomo) + if(Nsomo_I .EQ. 0) then + kstart = 1 + else + kstart = cfg_seniority_index(max(0,Nsomo_I-2)) + endif + kend = idxI-1 + !print *,"Isomo" + !call debug_spindet(Isomo,1) + !call debug_spindet(Idomo,1) + + !print *,"Nholes=",nholes," Nvmos=",nvmos, " idxi=",idxI + !do i = 1,nholes + ! print *,i,"->",listholes(i) + !enddo + !do i = 1,nvmos + ! print *,i,"->",listvmos(i) + !enddo + + do i = 1,nholes + p = listholes(i) + do j = 1,nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kend = idxI-1 + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-4))) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + kstart = cfg_seniority_index(Nsomo_I) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kend = idxI-1 + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! Again, we don't have to search from 1 + ! we just use seniortiy to find the + ! first index with NSOMO - 2 to NSOMO + 2 + ! this is what is done in kstart, kend + + pqAlreadyGenQ = .FALSE. + ! First check if it can be generated before + do k = kstart, kend + diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle + diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then + pqAlreadyGenQ = .TRUE. + ppExistsQ = .TRUE. + EXIT + endif + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + pqAlreadyGenQ = .TRUE. + !EXIT + !ppExistsQ = .TRUE. + !print *,i,k,ndiffSOMO,ndiffDOMO + !call debug_spindet(Jsomo,1) + !call debug_spindet(Jdomo,1) + !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1) + !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1) + EXIT + endif + end do + + !print *,"(,",p,",",q,")",pqAlreadyGenQ + + if(pqAlreadyGenQ) cycle + + pqExistsQ = .FALSE. + ! now check if this exists in the selected list + !do k = idxI+1, N_configuration + ! diffSOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jsomo),psi_configuration(1,1,k)) + ! diffDOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jdomo),psi_configuration(1,2,k)) + ! ndiffSOMO = POPCNT(diffSOMO) + ! ndiffDOMO = POPCNT(diffDOMO) + ! if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! pqExistsQ = .TRUE. + ! EXIT + ! endif + !end do + + if(.NOT. pqExistsQ) then + tableUniqueAlphas(p,q) = .TRUE. + !print *,p,q + !call debug_spindet(Jsomo,1) + !call debug_spindet(Jdomo,1) + endif + end do + end do + + !print *,tableUniqueAlphas(:,:) + + ! prune list of alphas + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + Jsomo = Icfg(1,1) + Jdomo = Icfg(1,2) + NalphaIcfg = 0 + do i = 1, nholes + p = listholes(i) + do j = 1, nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(tableUniqueAlphas(p,q)) then + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! SOMO + NalphaIcfg += 1 + !print *,i,j,"|",NalphaIcfg + alphasIcfg(1,1,NalphaIcfg) = Jsomo + alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + endif + end do + end do + + ! Check if this Icfg has been previously generated as a mono + ppExistsQ = .False. + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + do k = 1, idxI-1 + diffSOMO = IEOR(Isomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + diffDOMO = IEOR(Idomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffSOMO = POPCNT(diffSOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + ppExistsQ = .TRUE. + EXIT + endif + end do + ! Diagonal part (pp,qq) + if(nholes > 0 .AND. (.NOT. ppExistsQ))then + ! SOMO + NalphaIcfg += 1 + !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg + !call debug_spindet(Idomo,1) + !call debug_spindet(Jdomo,1) + alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1) + alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2) + endif + + end subroutine + function getNSOMO(Icfg) result(NSOMO) implicit none integer(bit_kind),intent(in) :: Icfg(N_int,2) @@ -8,98 +571,3 @@ NSOMO += POPCNT(Icfg(i,1)) enddo end function getNSOMO - -subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel) - implicit none - BEGIN_DOC - ! This function converts the orbital ids - ! in real space to those used in model space - ! in order to identify the matrices required - ! for the calculation of MEs. - ! - ! The type of excitations are ordered as follows: - ! Type 1 - SOMO -> SOMO - ! Type 2 - DOMO -> VMO - ! Type 3 - SOMO -> VMO - ! Type 4 - DOMO -> SOMO - END_DOC - integer(bit_kind),intent(in) :: Ialpha(N_int,2) - integer(bit_kind),intent(in) :: Jcfg(N_int,2) - integer,intent(in) :: p,q - integer,intent(in) :: extype - integer,intent(out) :: pmodel,qmodel - integer*8 :: Isomo - integer*8 :: Idomo - integer*8 :: Jsomo - integer*8 :: Jdomo - integer*8 :: mask - integer*8 :: Isomotmp - integer*8 :: Jsomotmp - integer :: pos0,pos0prev - - ! TODO Flag (print) when model space indices is > 64 - Isomo = Ialpha(1,1) - Idomo = Ialpha(1,2) - Jsomo = Jcfg(1,1) - Jdomo = Jcfg(1,2) - pos0prev = 0 - pmodel = p - qmodel = q - - if(p .EQ. q) then - pmodel = 1 - qmodel = 1 - else - !print *,"input pq=",p,q,"extype=",extype - !call debug_spindet(Isomo,1) - !call debug_spindet(Idomo,1) - !call debug_spindet(Jsomo,1) - !call debug_spindet(Jdomo,1) - select case(extype) - case (1) - ! SOMO -> SOMO - ! remove all domos - !print *,"type -> SOMO -> SOMO" - mask = ISHFT(1_8,p) - 1 - Isomotmp = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Isomotmp = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - case (2) - ! DOMO -> VMO - ! remove all domos except one at p - !print *,"type -> DOMO -> VMO" - mask = ISHFT(1_8,p) - 1 - Jsomotmp = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomotmp = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - case (3) - ! SOMO -> VMO - !print *,"type -> SOMO -> VMO" - !Isomo = IEOR(Isomo,Jsomo) - mask = ISHFT(1_8,p) - 1 - Isomo = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomo = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - case (4) - ! DOMO -> SOMO - ! remove all domos except one at p - !print *,"type -> DOMO -> SOMO" - !Isomo = IEOR(Isomo,Jsomo) - mask = ISHFT(1_8,p) - 1 - Jsomo = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - mask = ISHFT(1_8,q) - 1 - Isomo = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - case default - print *,"something is wrong in convertOrbIdsToModelSpaceIds" - end select - endif - !print *,p,q,"model ids=",pmodel,qmodel -end subroutine convertOrbIdsToModelSpaceIds diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 4d409f50..680cdc39 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -8,6 +8,7 @@ end function logabsgamma BEGIN_PROVIDER [ integer, NSOMOMax] + &BEGIN_PROVIDER [ integer, NSOMOMin] &BEGIN_PROVIDER [ integer, NCSFMax] &BEGIN_PROVIDER [ integer*8, NMO] &BEGIN_PROVIDER [ integer, NBFMax] @@ -20,6 +21,7 @@ ! required for the calculation of prototype arrays. END_DOC NSOMOMax = min(elec_num, cfg_nsomo_max + 2) + NSOMOMin = 0 ! Note that here we need NSOMOMax + 2 sizes NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0 NBFMax = NCSFMax @@ -101,9 +103,9 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) real*8,intent(out) :: phaseout integer(bit_kind) :: mask, deta(N_int), detb(N_int) integer :: nbetas - integer :: k + integer :: count, k - ! Initialize deta and detb + ! Initliaze deta and detb deta = Ialpha detb = Ibeta @@ -141,7 +143,7 @@ end subroutine get_phase_qp_to_cfg - BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,2)] + BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,2)] &BEGIN_PROVIDER [ integer, rowsmax] &BEGIN_PROVIDER [ integer, colsmax] use cfunctions @@ -160,14 +162,18 @@ end subroutine get_phase_qp_to_cfg cols = -1 integer*8 MS MS = elec_alpha_num-elec_beta_num - integer nsomomin nsomomin = elec_alpha_num-elec_beta_num rowsmax = 0 colsmax = 0 + print *,"NSOMOMax = ",NSOMOMax + print *,"NSOMOMin = ",NSOMOMin !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) ! Type ! 1. SOMO -> SOMO - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + !print *,"Doing SOMO->SOMO" + AIJpqMatrixDimsList(NSOMOMin,1,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,1,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i-2,i-2, 2 Jsomo = ISHFT(1_8,j)-1 @@ -194,6 +200,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, "SOMO->SOMO \t",i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -201,15 +208,18 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,1) = rows - AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,2) = cols + AIJpqMatrixDimsList(nsomoi,1,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,1,k,l,2) = cols end do end do end do end do ! Type ! 2. DOMO -> VMO - do i = 0+iand(nsomomin,1), NSOMOMax, 2 + !print *,"Doing DOMO->VMO" + AIJpqMatrixDimsList(NSOMOMin,2,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,2,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 tmpsomo = ISHFT(1_8,i+2)-1 do j = i+2,i+2, 2 @@ -242,6 +252,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -249,8 +260,8 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,1) = rows - AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,2) = cols + AIJpqMatrixDimsList(nsomoi,2,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,2,k,l,2) = cols end do end do end do @@ -258,15 +269,17 @@ end subroutine get_phase_qp_to_cfg ! Type ! 3. SOMO -> VMO !print *,"Doing SOMO->VMO" - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + AIJpqMatrixDimsList(NSOMOMin,3,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,3,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,j)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) then cycle end if - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,l-1) @@ -281,6 +294,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -288,25 +302,27 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(i,j,3,k,l,1) = rows - AIJpqMatrixDimsList(i,j,3,k,l,2) = cols + AIJpqMatrixDimsList(i,3,k,l,1) = rows + AIJpqMatrixDimsList(i,3,k,l,2) = cols end do end do end do end do ! Type - ! 4. DOMO -> VMO + ! 4. DOMO -> SOMO !print *,"Doing DOMO->SOMO" - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + AIJpqMatrixDimsList(NSOMOMin,4,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,4,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 do j = i,i, 2 if(j .GT. NSOMOMax .OR. j .LE. 0) then cycle end if - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 - Isomo = IBCLR(Isomo,k+1-1) + Isomo = IBCLR(Isomo,k-1) Jsomo = ISHFT(1_8,j+1)-1 Jsomo = IBCLR(Jsomo,l-1) else @@ -318,6 +334,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -325,15 +342,16 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(i,j,4,k,l,1) = rows - AIJpqMatrixDimsList(i,j,4,k,l,2) = cols + AIJpqMatrixDimsList(i,4,k,l,1) = rows + AIJpqMatrixDimsList(i,4,k,l,2) = cols end do end do end do end do + print *,"Rowsmax=",rowsmax," Colsmax=",colsmax END_PROVIDER - BEGIN_PROVIDER [ real*8, AIJpqContainer, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,NBFMax,NBFMax)] + BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)] use cfunctions implicit none BEGIN_DOC @@ -364,69 +382,78 @@ end subroutine get_phase_qp_to_cfg cols = -1 integer*8 MS MS = 0 - touch AIJpqMatrixDimsList real*8,dimension(:,:),allocatable :: meMatrix integer maxdim - !maxdim = max(rowsmax,colsmax) - ! allocate matrix - !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) + ! Type ! 1. SOMO -> SOMO - do i = 2, NSOMOMax, 2 + AIJpqContainer = 0.d0 + AIJpqContainer(1,1,1,1,1,NSOMOMin) = 1.0d0 + integer :: rows_old, cols_old + rows_old = -1 + cols_old = -1 + allocate(meMatrix(1,1)) + do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 - do j = i-2,i-2, 2 - if(j .GT. NSOMOMax .OR. j .LT. 0) cycle - do k = 1,i - do l = 1,i + j=i-2 + if(j .GT. NSOMOMax .OR. j .LT. 0) cycle + nsomoi = i + do k = 1,i + orbp = k + do l = 1,i - ! Define Jsomo - if(k .NE. l) then - Jsomo = IBCLR(Isomo, k-1) - Jsomo = IBCLR(Jsomo, l-1) - nsomoi = i - nsomoj = j - else - Isomo = ISHFT(1_8,i)-1 - Jsomo = ISHFT(1_8,i)-1 - nsomoi = i - nsomoj = i - endif + ! Define Jsomo + if(k .NE. l) then + Jsomo = IBCLR(Isomo, k-1) + Jsomo = IBCLR(Jsomo, l-1) + nsomoj = j + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,i)-1 + nsomoj = i + endif - AIJpqContainer(nsomoi,nsomoj,1,k,l,:,:) = 0.0d0 - call getApqIJMatrixDims(Isomo, & - Jsomo, & - MS, & - rows, & - cols) + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) - orbp = k - orbq = l - allocate(meMatrix(rows,cols)) - meMatrix = 0.0d0 - ! fill matrix - call getApqIJMatrixDriver(Isomo, & - Jsomo, & - orbp, & - orbq, & - MS, & - NMO, & - meMatrix, & - rows, & - cols) - ! i -> j - do ri = 1,rows - do ci = 1,cols - AIJpqContainer(nsomoi,nsomoj,1,k,l,ri,ci) = meMatrix(ri, ci) - end do + orbq = l + if ((rows /= rows_old).or.(cols /= cols_old)) then + deallocate(meMatrix) + allocate(meMatrix(rows,cols)) + rows_old = rows + cols_old = cols + endif + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(ri,ci,k,l,1,nsomoi) = meMatrix(ri, ci) end do - deallocate(meMatrix) end do end do end do end do + deallocate(meMatrix) + ! Type ! 2. DOMO -> VMO - do i = 0, NSOMOMax, 2 + !print *,"Doing DOMO -> VMO" + !AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,2,NSOMOMin) = 1.0d0 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 tmpsomo = ISHFT(1_8,i+2)-1 do j = i+2,i+2, 2 @@ -451,7 +478,12 @@ end subroutine get_phase_qp_to_cfg nsomoj = j endif - AIJpqContainer(nsomoi,nsomoj,2,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,2,nsomoi) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -472,10 +504,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax + !call printMatrix(meMatrix,rows,cols) ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(nsomoi,nsomoj,2,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,2,nsomoi) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -485,13 +520,16 @@ end subroutine get_phase_qp_to_cfg end do ! Type ! 3. SOMO -> VMO - do i = 2, NSOMOMax, 2 + !print *,"Doing SOMO -> VMO" + !AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,3,NSOMOMin) = 1.0d0 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,j)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) cycle - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,l-1) @@ -502,7 +540,12 @@ end subroutine get_phase_qp_to_cfg Jsomo = ISHFT(1_8,j)-1 endif - AIJpqContainer(i,j,3,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(i,3,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,3,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -523,10 +566,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !call printMatrix(meMatrix,rows,cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(i,j,3,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,3,i) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -536,24 +582,28 @@ end subroutine get_phase_qp_to_cfg end do ! Type ! 4. DOMO -> SOMO - do i = 2, NSOMOMax, 2 + !print *,"Doing DOMO -> SOMO" + !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0 + do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,i)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) cycle - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,k-1) Jsomo = ISHFT(1_8,j+1)-1 - Jsomo = IBCLR(Jsomo,l+1-1) + Jsomo = IBCLR(Jsomo,l-1) else Isomo = ISHFT(1_8,i)-1 Jsomo = ISHFT(1_8,j)-1 endif - AIJpqContainer(i,j,4,k,l,:,:) = 0.0d0 + !AIJpqContainer(i,4,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,4,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -575,10 +625,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !call printMatrix(meMatrix,rows,cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(i,j,4,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,4,i) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -588,31 +641,178 @@ end subroutine get_phase_qp_to_cfg end do END_PROVIDER +subroutine calculate_preconditioner_cfg(diag_energies) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for calculate_preconditioner + ! + ! Calculates the diagonal energies of + ! the configurations in psi_configuration + ! returns : diag_energies : + END_DOC + integer :: i,j,k,l,p,q,noccp,noccq, ii, jj + real*8,intent(out) :: diag_energies(n_CSF) + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer :: NSOMOI + integer :: NSOMOJ + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: starti, endi, cnti, cntj, rows,cols + integer :: extype,pmodel,qmodel + integer(bit_kind) :: Icfg(N_INT,2) + integer(bit_kind) :: Jcfg(N_INT,2) + integer,external :: getNSOMO + real*8, external :: mo_two_e_integral + real*8 :: hpp + real*8 :: meCC + real*8 :: ecore -!!!!!! + PROVIDE h_core_ri + ! initialize energies + diag_energies = 0.d0 + + ! calculate core energy + !call get_core_energy(ecore) + !diag_energies = ecore + + ! calculate the core energy + !print *,"Core energy=",ref_bitmask_energy + + do i=1,N_configuration + + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + NSOMOI = getNSOMO(psi_configuration(:,:,i)) + + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + !do k = n_core_orb+1,n_core_orb + n_act_orb + do k = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif + enddo + ! holes in DOMO + !do k = n_core_orb+1,n_core_orb + n_act_orb + !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb + do k = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif + enddo + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + !do k = n_core_orb+1,n_core_orb + n_act_orb + do k = 1,mo_num + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if + enddo + !print *,"I=",i + !call debug_spindet(psi_configuration(1,1,i),N_int) + !call debug_spindet(psi_configuration(1,2,i),N_int) + + do k=1,nholes + p = listholes(k) + noccp = holetype(k) + + ! Calculate one-electron + ! and two-electron coulomb terms + do l=1,nholes + q = listholes(l) + noccq = holetype(l) + !print *,"--------------- K=",p," L=",q + + ! one-electron term + if(p.EQ.q) then + hpp = noccq * h_core_ri(p,q)!mo_one_e_integrals(q,q) + else + hpp = 0.d0 + endif + + + do j=starti,endi + ! coulomb term + ! (pp,qq) = + if(p.EQ.q) then + diag_energies(j) += hpp !+ 0.5d0 * (noccp * noccq * mo_two_e_integral(p,q,p,q)) + !print *,"hpp=",hpp,"diga= ",diag_energies(j) +! else +! diag_energies(j) += ! 0.5d0 * noccp * noccq * mo_two_e_integral(p,q,p,q) +! print *,"diga= ",diag_energies(j) + endif + enddo + enddo + + enddo + enddo + +end subroutine calculate_preconditioner_cfg BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] - &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF)] + &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF,1)] &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] + &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)] use cfunctions - use bitmasks implicit none BEGIN_DOC ! Documentation for DetToCSFTransformationMatrix ! Provides the matrix of transformatons for the ! conversion between determinant to CSF basis (in BFs) END_DOC - integer(bit_kind) :: mask(N_int), Ialpha(N_int),Ibeta(N_int) + integer*8 :: Isomo, Idomo + integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int) integer :: rows, cols, i, j, k - integer :: startdet, enddet - integer*8 MS, Isomo, Idomo + integer :: startdet, enddet, idx + integer*8 MS integer ndetI integer :: getNSOMO real*8,dimension(:,:),allocatable :: tempBuffer real*8,dimension(:),allocatable :: tempCoeff real*8 :: norm_det1, phasedet + + integer :: nt + + norm_det1 = 0.d0 MS = elec_alpha_num - elec_beta_num + print *,"Maxbfdim=",NBFMax + print *,"Maxdetdim=",maxDetDimPerBF + print *,"n_CSF=",n_CSF + print *,"N_configurations=",N_configuration + print *,"n_core_orb=",n_core_orb ! initialization psi_coef_config = 0.d0 DetToCSFTransformationMatrix(0,:,:) = 1.d0 @@ -634,10 +834,13 @@ end subroutine get_phase_qp_to_cfg countcsf = 0 integer countdet countdet = 0 - integer idx integer istate istate = 1 + psi_csf_to_config_data(1) = 1 phasedet = 1.0d0 + call omp_set_max_active_levels(1) + !$OMP PARALLEL + !$OMP MASTER do i = 1,N_configuration startdet = psi_configuration_to_psi_det(1,i) enddet = psi_configuration_to_psi_det(2,i) @@ -655,6 +858,9 @@ end subroutine get_phase_qp_to_cfg countdet += 1 enddo + !print *,"dimcoef=",bfIcfg,norm_det1 + !call printMatrix(tempCoeff,ndetI,1) + s = 0 do k=1,N_int if (psi_configuration(k,1,i) == 0_bit_kind) cycle @@ -667,14 +873,1023 @@ end subroutine get_phase_qp_to_cfg allocate(tempBuffer(bfIcfg,ndetI)) tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) - call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1), size(psi_coef_config,1)) + call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1)) !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) deallocate(tempCoeff) deallocate(tempBuffer) psi_config_data(i,1) = countcsf + 1 + do k=1,bfIcfg + psi_csf_to_config_data(countcsf+k) = i + enddo countcsf += bfIcfg psi_config_data(i,2) = countcsf enddo + print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf + !$OMP END MASTER + !$OMP END PARALLEL + call omp_set_max_active_levels(4) END_PROVIDER + +subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! Ialpha by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer ,intent(in) :: nconnectedJ + integer(bit_kind),intent(in) :: listconnectedJ(N_int,2,*) + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(in) :: idslistconnectedJ(*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: excitationIds(2,*) + integer,intent(out) :: excitationTypes(*) + real*8 ,intent(out) :: diagfactors(*) + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ii,i,j,k,kk,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes, idxJ + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + logical :: isOKlistJ + + PROVIDE DetToCSFTransformationMatrix + + isOKlistJ = .False. + + nconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Nsomo_alpha = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + !end_index = N_configuration + + + p = 0 + q = 0 + do i=1,nconnectedJ + idxJ = idslistconnectedJ(i) + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = listconnectedJ(1,1,i) + Jdomo = listconnectedJ(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + select case(ndiffDOMO) + case (0) + ! SOMO -> VMO + !print *,"obt SOMO -> VMO" + extyp = 3 + IJsomo = IEOR(Isomo, Jsomo) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor( IAND(Isomo,IJsomo), IAND(Isomo,IJsomo)-1)) -1) + 1 +IRP_ELSE + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 +IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IJsomo) + 1 +IRP_ENDIF + case (1) + ! DOMO -> VMO + ! or + ! SOMO -> SOMO + nsomoJ = POPCNT(Jsomo) + nsomoalpha = POPCNT(Isomo) + if(nsomoJ .GT. nsomoalpha) then + ! DOMO -> VMO + !print *,"obt DOMO -> VMO" + extyp = 2 +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +IRP_ELSE + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(Isomo) + 1 +IRP_ENDIF + else + ! SOMO -> SOMO + !print *,"obt SOMO -> SOMO" + extyp = 1 +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,q-1) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +IRP_ELSE + p = TRAILZ(Isomo) + 1 +IRP_ENDIF + end if + case (2) + ! DOMO -> SOMO + !print *,"obt DOMO -> SOMO" + extyp = 4 + IJsomo = IEOR(Isomo, Jsomo) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor(IAND(Jsomo,IJsomo) ,IAND(Jsomo,IJsomo) -1))-1) + 1 +IRP_ELSE + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 +IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IJsomo) + 1 +IRP_ENDIF + case default + print *,"something went wront in get connectedI" + end select + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = listconnectedJ(1,1,i) + Idomo = listconnectedJ(1,2,i) + do ii = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + + do k=1,nholes + p = listholes(k) + q = p + extyp = 1 + if(holetype(k) .EQ. 1) then + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 2.0d0 + endif + enddo + endif + end do + +end subroutine obtain_connected_I_foralpha_fromfilterdlist + + +subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel) + implicit none + BEGIN_DOC + ! This function converts the orbital ids + ! in real space to those used in model space + ! in order to identify the matrices required + ! for the calculation of MEs. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + END_DOC + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(in) :: Jcfg(N_int,2) + integer,intent(in) :: p,q + integer,intent(in) :: extype + integer,intent(out) :: pmodel,qmodel + !integer(bit_kind) :: Isomo(N_int) + !integer(bit_kind) :: Idomo(N_int) + !integer(bit_kind) :: Jsomo(N_int) + !integer(bit_kind) :: Jdomo(N_int) + integer*8 :: Isomo + integer*8 :: Idomo + integer*8 :: Jsomo + integer*8 :: Jdomo + integer*8 :: mask + integer :: iint, ipos + !integer(bit_kind) :: Isomotmp(N_int) + !integer(bit_kind) :: Jsomotmp(N_int) + integer*8 :: Isomotmp + integer*8 :: Jsomotmp + integer :: pos0,pos0prev + + ! TODO Flag (print) when model space indices is > 64 + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = Jcfg(1,1) + Jdomo = Jcfg(1,2) + pos0prev = 0 + pmodel = p + qmodel = q + + if(p .EQ. q) then + pmodel = 1 + qmodel = 1 + else + select case(extype) + case (1) + ! SOMO -> SOMO + ! remove all domos + !print *,"type -> SOMO -> SOMO" + mask = ISHFT(1_8,p) - 1 + Isomotmp = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + mask = ISHFT(1_8,q) - 1 + Isomotmp = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + case (2) + ! DOMO -> VMO + ! remove all domos except one at p + !print *,"type -> DOMO -> VMO" + mask = ISHFT(1_8,p) - 1 + Jsomotmp = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + mask = ISHFT(1_8,q) - 1 + Jsomotmp = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + case (3) + ! SOMO -> VMO + !print *,"type -> SOMO -> VMO" + !Isomo = IEOR(Isomo,Jsomo) + if(p.LT.q) then + mask = ISHFT(1_8,p) - 1 + Isomo = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + mask = ISHFT(1_8,q) - 1 + Jsomo = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + else + mask = ISHFT(1_8,p) - 1 + Isomo = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + mask = ISHFT(1_8,q) - 1 + Jsomo = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + endif + case (4) + ! DOMO -> SOMO + ! remove all domos except one at p + !print *,"type -> DOMO -> SOMO" + !Isomo = IEOR(Isomo,Jsomo) + if(p.LT.q) then + mask = ISHFT(1_8,p) - 1 + Jsomo = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + mask = ISHFT(1_8,q) - 1 + Isomo = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + else + mask = ISHFT(1_8,p) - 1 + Jsomo = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + mask = ISHFT(1_8,q) - 1 + Isomo = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + endif + case default + print *,"something is wrong in convertOrbIdsToModelSpaceIds" + end select + endif + !print *,p,q,"model ids=",pmodel,qmodel +end subroutine convertOrbIdsToModelSpaceIds + +subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) + implicit none + use bitmasks + use omp_lib + BEGIN_DOC + ! Documentation for sigma-vector calculation + ! + ! Calculates the result of the + ! application of the hamiltonian to the + ! wavefunction in CFG basis once + ! TODO : Things prepare outside this routine + ! 1. Touch the providers for + ! a. ApqIJ containers + ! b. DET to CSF transformation matrices + ! 2. DET to CSF transcormation + ! 2. CSF to DET back transcormation + ! returns : psi_coef_out_det : + END_DOC + integer,intent(in) :: sze, istart,iend, istep, ishift, n_st + real*8,intent(in) :: psi_in(n_st,sze) + real*8,intent(out) :: psi_out(n_st,sze) + integer(bit_kind) :: Icfg(N_INT,2) + integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk + integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ + integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg + integer(bit_kind),dimension(:,:,:),allocatable :: singlesI + integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha + integer,dimension(:),allocatable :: idxs_singlesI + integer,dimension(:),allocatable :: idxs_connectedI_alpha + integer,dimension(:,:),allocatable :: excitationIds_single + integer,dimension(:),allocatable :: excitationTypes_single + integer,dimension(:,:),allocatable :: excitationIds + integer,dimension(:),allocatable :: excitationTypes + integer,dimension(:),allocatable :: idslistconnectedJ + real*8,dimension(:),allocatable :: diagfactors + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI + integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel + integer :: getNSOMO + integer :: totcolsTKI + integer :: rowsTKI + integer :: noccpp + integer :: istart_cfg, iend_cfg, num_threads_max + integer :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ + integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk + real*8 :: norm_coef_cfg, fac2eints + real*8 :: norm_coef_det + real*8 :: meCC1, meCC2, diagfac + real*8,dimension(:,:,:),allocatable :: TKI + real*8,dimension(:,:),allocatable :: GIJpqrs + real*8,dimension(:,:,:),allocatable :: TKIGIJ + real*8,dimension(:),allocatable :: psi_out_tmp + real*8,dimension(:,:),allocatable :: CCmattmp + real*8, external :: mo_two_e_integral + real*8, external :: get_two_e_integral + real*8,dimension(:),allocatable:: diag_energies + real*8 :: tmpvar, tmptot + + integer(omp_lock_kind), allocatable :: lock(:) + call omp_set_max_active_levels(1) + + allocate(lock(sze)) + do i=1,sze + call omp_init_lock(lock(i)) + enddo + + !print *," sze = ",sze + allocate(diag_energies(n_CSF)) + call calculate_preconditioner_cfg(diag_energies) + + MS = 0 + norm_coef_cfg=0.d0 + + psi_out=0.d0 + + istart_cfg = psi_csf_to_config_data(istart) + iend_cfg = psi_csf_to_config_data(iend) + + !nconnectedtotalmax = 1000 + !nconnectedmaxJ = 1000 + maxnalphas = elec_num*mo_num + Icfg(1,1) = psi_configuration(1,1,1) + Icfg(1,2) = psi_configuration(1,2,1) + allocate(listconnectedJ(N_INT,2,max(sze,100))) + allocate(idslistconnectedJ(max(sze,100))) + call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax) + deallocate(listconnectedJ) + deallocate(idslistconnectedJ) + + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: configuration_search_key + double precision :: diagfactors_0 + allocate( bit_tmp(0:N_configuration+1)) + do j=1,N_configuration + bit_tmp(j) = configuration_search_key(psi_configuration(1,1,j),N_int) + enddo + + call omp_set_max_active_levels(1) + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,& + !$OMP holetype, vmotype, nvmos, listvmos, starti, endi, & + !$OMP nsinglesI, singlesI,idxs_singlesI,excitationIds_single,& + !$OMP excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,& + !$OMP Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,& + !$OMP nconnectedJ,listconnectedJ,idslistconnectedJ,ntotJ, & + !$OMP Nalphas_Icfg,alphas_Icfg,connectedI_alpha, & + !$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,& + !$OMP totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq, & + !$OMP colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,& + !$OMP diagfac, tmpvar, diagfactors_0) & + !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& + !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, AIJpqContainer,& + !$OMP sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & + !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& + !$OMP num_threads_max) + + allocate(singlesI(N_INT,2,max(sze,100))) + allocate(idxs_singlesI(max(sze,100))) + allocate(excitationIds_single(2,max(sze,100))) + allocate(excitationTypes_single(max(sze,100))) +! + + !!! Single Excitations !!! + + !$OMP DO SCHEDULE(dynamic,16) + do i=istart_cfg,iend_cfg + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + NSOMOI = getNSOMO(Icfg) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + ! list_act + ! list_core + ! list_core_inact + ! bitmasks + !do k = n_core_orb+1,n_core_orb + n_act_orb + do k = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif + enddo + ! holes in DOMO + !do k = n_core_orb+1,n_core_orb + n_act_orb + do k = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif + enddo + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + !do k = n_core_orb+1,n_core_orb + n_act_orb + do k = 1,mo_num + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if + enddo + + + ! Icsf ids + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + NSOMOI = getNSOMO(Icfg) + + call generate_all_singles_cfg_with_type(bit_tmp,Icfg,singlesI,idxs_singlesI,excitationIds_single,& + excitationTypes_single,nsinglesI,N_int) + + do j = 1,nsinglesI + idxI = idxs_singlesI(j) + NSOMOJ = getNSOMO(singlesI(1,1,j)) + p = excitationIds_single(1,j) + q = excitationIds_single(2,j) + extype = excitationTypes_single(j) + ! Off diagonal terms + call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) + Jsomo = singlesI(1,1,j) + Jdomo = singlesI(1,2,j) + + ! Add the hole on J + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 2 + endif + + startj = psi_config_data(idxI,1) + endj = psi_config_data(idxI,2) + + !!! One-electron contribution !!! + do ii = starti, endi + cnti = ii-starti+1 + do jj = startj, endj + cntj = jj-startj+1 + meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) + call omp_set_lock(lock(jj)) + do kk = 1,n_st + psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) + enddo + call omp_unset_lock(lock(jj)) + enddo + enddo + + ! Undo setting in listholes + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + enddo + enddo + !$OMP END DO + deallocate(singlesI) + deallocate(idxs_singlesI) + deallocate(excitationIds_single) + deallocate(excitationTypes_single) + + allocate(listconnectedJ(N_INT,2,max(sze,100))) + allocate(alphas_Icfg(N_INT,2,max(sze,100))) + allocate(connectedI_alpha(N_INT,2,max(sze,100))) + allocate(idxs_connectedI_alpha(max(sze,100))) + allocate(excitationIds(2,max(sze,100))) + allocate(excitationTypes(max(sze,100))) + allocate(diagfactors(max(sze,100))) + allocate(idslistconnectedJ(max(sze,100))) + allocate(CCmattmp(n_st,NBFmax)) + + ! Loop over all selected configurations + !$OMP DO SCHEDULE(dynamic,16) + do i = istart_cfg,iend_cfg +! TKI = 0.d0 +! GIJpqrs = 0.d0 +! TKIGIJ = 0.d0 + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + if(Nalphas_Icfg .GT. maxnalphas) then + print *,"Nalpha > maxnalpha" + endif + + call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) + + ! TODO : remove doubly excited for return + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + + call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, & + listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, & + excitationIds,excitationTypes,diagfactors) + + if(nconnectedI .EQ. 0) then + cycle + endif + + totcolsTKI = 0 + rowsTKI = -1 + NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + totcolsTKI += colsikpq + rowsTKI = rowsikpq + enddo + + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the integral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs + !print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI + + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + rowsTKI = rowsikpq + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) & + * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + enddo + enddo + diagfactors_0 = diagfactors(j)*0.5d0 + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + do l=1,nconnectedI + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + diagfac = diagfactors_0 * diagfactors(l)* mo_two_e_integral(mok,mol,moi,moj)! g(pq,sr) = + do m = 1,colsikpq + ! = (ik|jl) + GIJpqrs(totcolsTKI+m,l) = diagfac + enddo + enddo + totcolsTKI += colsikpq + enddo + + + ! Do big BLAS + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, & + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, & + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) + + + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + rowsTKI = rowsikpq + CCmattmp = 0.d0 + + call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0, & + TKIGIJ(1,1,j), size(TKIGIJ,1), & + AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha), & + size(AIJpqContainer,1), 0.d0, & + CCmattmp, size(CCmattmp,1) ) + + do m = 1,colsikpq + call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) + enddo + call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) + enddo + totcolsTKI += colsikpq + enddo + + deallocate(TKI) ! coefficients of CSF + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs + + enddo ! loop over alphas + enddo ! loop over I + !$OMP END DO + call omp_set_max_active_levels(4) + deallocate(CCmattmp) + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) + + + ! Add the diagonal contribution + !$OMP DO + do i = 1,n_CSF + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + deallocate(diag_energies) + deallocate(bit_tmp) + +end subroutine calculate_sigma_vector_cfg_nst_naive_store + + + + +subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for sigma-vector calculation + ! + ! Calculates the result of the + ! application of the hamiltonian to the + ! wavefunction in CFG basis once + ! TODO : Things prepare outside this routine + ! 1. Touch the providers for + ! a. ApqIJ containers + ! b. DET to CSF transformation matrices + ! 2. DET to CSF transcormation + ! 2. CSF to DET back transcormation + ! returns : psi_coef_out_det : + END_DOC + integer,intent(in) :: sze, istart,iend, istep, ishift, n_st + real*8,intent(in) :: psi_in(sze,n_st) + real*8,intent(out) :: psi_out(sze,n_st) + integer(bit_kind) :: Icfg(N_INT,2) + integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk + integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg + integer(bit_kind),dimension(:,:,:),allocatable :: singlesI + integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha + integer,dimension(:),allocatable :: idxs_singlesI + integer,dimension(:),allocatable :: idxs_connectedI_alpha + integer,dimension(:,:),allocatable :: excitationIds_single + integer,dimension(:),allocatable :: excitationTypes_single + integer,dimension(:,:),allocatable :: excitationIds + integer,dimension(:),allocatable :: excitationTypes + real*8,dimension(:),allocatable :: diagfactors + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI + integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel + integer :: getNSOMO + integer :: totcolsTKI + integer :: rowsTKI + integer :: noccpp + integer :: istart_cfg, iend_cfg + integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk + real*8 :: norm_coef_cfg, fac2eints + real*8 :: norm_coef_det + real*8 :: meCC1, meCC2, diagfac + real*8,dimension(:,:,:),allocatable :: TKI + real*8,dimension(:,:),allocatable :: GIJpqrs + real*8,dimension(:,:,:),allocatable :: TKIGIJ + real*8, external :: mo_two_e_integral + real*8, external :: get_two_e_integral + real*8 :: diag_energies(n_CSF) + + ! allocate + allocate(alphas_Icfg(N_INT,2,max(sze/2,100))) + allocate(singlesI(N_INT,2,max(sze/2,100))) + allocate(connectedI_alpha(N_INT,2,max(sze/2,100))) + allocate(idxs_singlesI(max(sze/2,100))) + allocate(idxs_connectedI_alpha(max(sze/2,100))) + allocate(excitationIds_single(2,max(sze/2,100))) + allocate(excitationTypes_single(max(sze/2,100))) + allocate(excitationIds(2,max(sze/2,100))) + allocate(excitationTypes(max(sze/2,100))) + allocate(diagfactors(max(sze/2,100))) + + + !print *," sze = ",sze + call calculate_preconditioner_cfg(diag_energies) + + MS = 0 + norm_coef_cfg=0.d0 + + psi_out=0.d0 + + istart_cfg = psi_csf_to_config_data(istart) + iend_cfg = psi_csf_to_config_data(iend) + + + !!! Single Excitations !!! + do i=istart_cfg,iend_cfg + print *,"I=",i + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + + ! TODO : remove doubly excited for return + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) + + totcolsTKI = 0 + rowsTKI = -1 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + if(p.EQ.q) then + NSOMOalpha = NSOMOI + endif + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + totcolsTKI += colsikpq +! if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then +! print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI +! !rowsTKI = rowsikpq +! else + rowsTKI = rowsikpq +! endif + enddo + + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs + + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + enddo + enddo + do m = 1,colsikpq + do l = 1,nconnectedI + ! = (ik|jl) + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + if(moi.EQ.mok .AND. moj.EQ.mol)then + diagfac = diagfactors(j) + diagfac *= diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + else + diagfac = diagfactors(j)*diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + !endif + endif + enddo + enddo + totcolsTKI += colsikpq + enddo + + + + ! Do big BLAS + ! TODO TKI, size(TKI,1)*size(TKI,2) + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,& + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,& + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) + + + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + & + AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j) + enddo + enddo + enddo + totcolsTKI += colsikpq + enddo + + deallocate(TKI) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs + + enddo ! loop over alphas + enddo ! loop over I + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) + + + ! Add the diagonal contribution + do i = 1,n_CSF + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo + enddo + call omp_set_max_active_levels(4) + +end subroutine calculate_sigma_vector_cfg_nst_naive_store diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 0c3c6f92..b6d5a3f8 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -88,7 +88,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, intent(out) :: energies(N_st_diag_in) integer :: iter, N_st_diag - integer :: i,j,k,l,m + integer :: i,j,k,l,m,kk,ii logical, intent(inout) :: converged double precision, external :: u_dot_v, u_dot_u @@ -110,6 +110,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N integer :: order(N_st_diag_in) double precision :: cmax double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) + double precision, allocatable :: tmpU(:,:), tmpW(:,:) double precision, pointer :: W(:,:), W_csf(:,:) logical :: disk_based double precision :: energy_shift(N_st_diag_in*davidson_sze_max) @@ -303,11 +304,42 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! Compute |W_k> = \sum_i |i> ! ----------------------------------- - call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) + !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals if ((sze > 100000).and.distributed_davidson) then - call H_u_0_nstates_zmq (W,U,N_st_diag,sze) + !call H_u_0_nstates_zmq (W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + deallocate(tmpW) + deallocate(tmpU) else - call H_u_0_nstates_openmp(W,U,N_st_diag,sze) + !call H_u_0_nstates_openmp(W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + deallocate(tmpW) + deallocate(tmpU) endif ! else ! ! Already computed in update below @@ -351,7 +383,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N endif endif - call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) + !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) ! Compute h_kl = = ! ------------------------------------------- From abe662757ecf255cc26e0dcf56a8d7df369a44f3 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Mon, 6 Jun 2022 17:27:57 +0200 Subject: [PATCH 02/70] Added bug fixes for open-shell. --- src/csf/cfgCI_utils.c | 46 ++++++++++++++++++++++++++++++------------- src/csf/tree_utils.c | 26 +++++++++++++----------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 23b984a0..c11b38dc 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -68,10 +68,16 @@ void getBFIndexList(int NSOMO, int *BF1, int *IdxListBF1){ break; } } - BFcopy[Iidx] = -1; - BFcopy[Jidx] = -1; - IdxListBF1[Jidx] = Iidx; - IdxListBF1[Iidx] = Jidx; + if(countN1 <= 0){ + BFcopy[Iidx] = -1; + IdxListBF1[Iidx] = Iidx; + } + else{ + BFcopy[Iidx] = -1; + BFcopy[Jidx] = -1; + IdxListBF1[Jidx] = Iidx; + IdxListBF1[Iidx] = Jidx; + } } } @@ -328,10 +334,21 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl Get Overlap ************************************/ // Fill matrix + + int rowsbftodetI, colsbftodetI; + + /*********************************** + Get BFtoDeterminant Matrix + ************************************/ + + printf("In convertcsftodet\n"); + convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); + int rowsI = 0; int colsI = 0; - getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); /*********************************** @@ -342,14 +359,6 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl gramSchmidt(overlapMatrixI, rowsI, colsI, orthoMatrixI); - /*********************************** - Get BFtoDeterminant Matrix - ************************************/ - - int rowsbftodetI, colsbftodetI; - - convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); - /*********************************** Get Final CSF to Det Matrix ************************************/ @@ -1297,12 +1306,16 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv double fac = 1.0; for(int i = 0; i < NSOMO; i++) donepq[i] = 0.0; + for(int i=0;i 0.0 || donepq[idxq] > 0.0) continue; + if(donepq[idxp] > 0.0 || donepq[idxq] > 0.0 || idxp == idxq) continue; fac *= 2.0; donepq[idxp] = 1.0; donepq[idxq] = 1.0; @@ -1425,6 +1438,11 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * for(int k=0;k NSOMOMax || icpl < 0 || izeros > zeromax ) return; // If we find a valid BF assign its address - if(isomo == NSOMOMax){ + if(isomo == NSOMOMax && icpl == MSmax){ (*inode)->addr = bftree->rootNode->addr; bftree->rootNode->addr += 1; return; } // Call 0 branch - if(((*inode)->C0) == NULL && izeros+1 <= zeromax){ - ((*inode)->C0) = malloc(sizeof(Node)); - (*(*inode)->C0) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 0, .iSOMO = isomo }; - buildTree(bftree, &(*inode)->C0, isomo+1, izeros+1, icpl+1, NSOMOMax, MSmax); + if(izeros+1 <= zeromax){ + if(((*inode)->C0) == NULL){ + ((*inode)->C0) = malloc(sizeof(Node)); + (*(*inode)->C0) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 0, .iSOMO = isomo }; + buildTree(bftree, &(*inode)->C0, isomo+1, izeros+1, icpl+1, NSOMOMax, MSmax); + } + else buildTree(bftree, &(*inode)->C0, isomo+1, izeros+1, icpl+1, NSOMOMax, MSmax); } - else buildTree(bftree, &(*inode)->C0, isomo+1, izeros+1, icpl+1, NSOMOMax, MSmax); // Call 1 branch - if(((*inode)->C1) == NULL && icpl-1 >= 0){ - ((*inode)->C1) = malloc(sizeof(Node)); - (*(*inode)->C1) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 1, .iSOMO = isomo }; - buildTree(bftree, &(*inode)->C1, isomo+1, izeros+0, icpl-1, NSOMOMax, MSmax); + if(icpl-1 >=0){ + if(((*inode)->C1) == NULL){ + ((*inode)->C1) = malloc(sizeof(Node)); + (*(*inode)->C1) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 1, .iSOMO = isomo }; + buildTree(bftree, &(*inode)->C1, isomo+1, izeros+0, icpl-1, NSOMOMax, MSmax); + } + else buildTree(bftree, &(*inode)->C1, isomo+1, izeros+0, icpl-1, NSOMOMax, MSmax); } - else buildTree(bftree, &(*inode)->C1, isomo+1, izeros+0, icpl-1, NSOMOMax, MSmax); return; } From 48e48fbf16eb1e627dc50a720ed351c6b466df30 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Mon, 6 Jun 2022 17:28:48 +0200 Subject: [PATCH 03/70] Updated binary search and added cfg_somo_min. --- src/csf/configurations.irp.f | 119 ++++++++++++++++++++++++++++------- 1 file changed, 98 insertions(+), 21 deletions(-) diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f index 8e2a513c..ad7f5294 100644 --- a/src/csf/configurations.irp.f +++ b/src/csf/configurations.irp.f @@ -460,6 +460,7 @@ END_PROVIDER BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ] &BEGIN_PROVIDER [ integer, cfg_nsomo_max ] +&BEGIN_PROVIDER [ integer, cfg_nsomo_min ] implicit none BEGIN_DOC ! Returns the index in psi_configuration of the first cfg with @@ -467,9 +468,10 @@ END_PROVIDER ! ! cfg_nsomo_max : Max number of SOMO in the current wave function END_DOC - integer :: i, k, s, sold + integer :: i, k, s, sold, soldmin cfg_seniority_index(:) = -1 sold = -1 + soldmin = 2000 cfg_nsomo_max = 0 do i=1,N_configuration s = 0 @@ -482,6 +484,10 @@ END_PROVIDER cfg_seniority_index(s) = i cfg_nsomo_max = s endif + if (soldmin .GT. s ) then + soldmin = s + cfg_nsomo_min = s + endif enddo END_PROVIDER @@ -743,41 +749,112 @@ BEGIN_PROVIDER [ integer(bit_kind), dominant_dets_of_cfgs, (N_int,2,N_dominant_d enddo END_PROVIDER -subroutine binary_search_cfg(cfgInp,addcfg) +subroutine binary_search_cfg(cfgInp,addcfg,bit_tmp) use bitmasks implicit none BEGIN_DOC ! Documentation for binary_search - ! - ! Does a binary search to find + ! + ! Does a binary search to find ! the address of a configuration in a list of ! configurations. END_DOC integer(bit_kind), intent(in) :: cfgInp(N_int,2) integer , intent(out) :: addcfg - integer :: i,j,k,r,l - integer*8 :: key, key2 - logical :: found - !integer*8, allocatable :: bit_tmp(:) - !integer*8, external :: configuration_search_key + integer*8, intent(in) :: bit_tmp(0:N_configuration+1) - !allocate(bit_tmp(0:N_configuration)) - !bit_tmp(0) = 0 - do i=1,N_configuration - !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int) - found = .True. - do k=1,N_int - found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) & - .and. (psi_configuration(k,2,i) == cfgInp(k,2)) - enddo - if (found) then - addcfg = i - exit + logical :: found + integer :: l, r, j, k + integer*8 :: key + + integer*8, external :: configuration_search_key + + key = configuration_search_key(cfgInp,N_int) + + ! Binary search + l = 0 + r = N_configuration+1 +IRP_IF WITHOUT_SHIFTRL + j = ishft(r-l,-1) +IRP_ELSE + j = shiftr(r-l,1) +IRP_ENDIF + do while (j>=1) + j = j+l + if (bit_tmp(j) == key) then + ! Find 1st element which matches the key + if (j > 1) then + do while (j>1 .and. bit_tmp(j-1) == key) + j = j-1 + enddo + endif + ! Find correct element matching the key + do while (bit_tmp(j) == key) + found = .True. + do k=1,N_int + found = found .and. (psi_configuration(k,1,j) == cfgInp(k,1))& + .and. (psi_configuration(k,2,j) == cfgInp(k,2)) + enddo + if (found) then + addcfg = j + return + endif + j = j+1 + enddo + addcfg = -1 + return + else if (bit_tmp(j) > key) then + r = j + else + l = j endif +IRP_IF WITHOUT_SHIFTRL + j = ishft(r-l,-1) +IRP_ELSE + j = shiftr(r-l,1) +IRP_ENDIF enddo + addcfg = -1 + return + end subroutine +!subroutine binary_search_cfg(cfgInp,addcfg) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Documentation for binary_search +! ! +! ! Does a binary search to find +! ! the address of a configuration in a list of +! ! configurations. +! END_DOC +! integer(bit_kind), intent(in) :: cfgInp(N_int,2) +! integer , intent(out) :: addcfg +! integer :: i,j,k,r,l +! integer*8 :: key, key2 +! logical :: found +! !integer*8, allocatable :: bit_tmp(:) +! !integer*8, external :: configuration_search_key +! +! !allocate(bit_tmp(0:N_configuration)) +! !bit_tmp(0) = 0 +! do i=1,N_configuration +! !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int) +! found = .True. +! do k=1,N_int +! found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) & +! .and. (psi_configuration(k,2,i) == cfgInp(k,2)) +! enddo +! if (found) then +! addcfg = i +! exit +! endif +! enddo +! +!end subroutine +! BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det, (2,N_configuration) ] &BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det_data, (N_det) ] From 1c41bfdd17504c1ce5d5647686fe1ccf11bbe50f Mon Sep 17 00:00:00 2001 From: v1j4y Date: Mon, 6 Jun 2022 17:29:18 +0200 Subject: [PATCH 04/70] Sigma-vector seems to be working. --- src/csf/create_excitations.irp.f | 6 +- src/csf/sigma_vector.irp.f | 240 +++++++++++++++---------------- 2 files changed, 124 insertions(+), 122 deletions(-) diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f index c1560148..5e94e355 100644 --- a/src/csf/create_excitations.irp.f +++ b/src/csf/create_excitations.irp.f @@ -226,7 +226,7 @@ subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint) enddo end -subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) +subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) implicit none use bitmasks BEGIN_DOC @@ -238,6 +238,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin ! ex_type_singles : on output contains type of excitations : ! END_DOC + integer*8, intent(in) :: bit_tmp(0:N_configuration+1) integer, intent(in) :: Nint integer, intent(inout) :: n_singles integer, intent(out) :: idxs_singles(*) @@ -251,6 +252,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin integer(bit_kind) :: single(Nint,2) logical :: i_ok + n_singles = 0 !TODO !Make list of Somo and Domo for holes @@ -261,7 +263,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin addcfg = -1 call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok) if (i_ok) then - call binary_search_cfg(single,addcfg) + call binary_search_cfg(single,addcfg,bit_tmp) if(addcfg .EQ. -1) cycle n_singles = n_singles + 1 do k=1,Nint diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 680cdc39..7a84cfc2 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -21,7 +21,7 @@ ! required for the calculation of prototype arrays. END_DOC NSOMOMax = min(elec_num, cfg_nsomo_max + 2) - NSOMOMin = 0 + NSOMOMin = max(0,cfg_nsomo_min-2) ! Note that here we need NSOMOMax + 2 sizes NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0 NBFMax = NCSFMax @@ -143,6 +143,117 @@ end subroutine get_phase_qp_to_cfg + BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] + &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF,1)] + &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] + &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)] + use cfunctions + implicit none + BEGIN_DOC + ! Documentation for DetToCSFTransformationMatrix + ! Provides the matrix of transformatons for the + ! conversion between determinant to CSF basis (in BFs) + END_DOC + integer*8 :: Isomo, Idomo + integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int) + integer :: rows, cols, i, j, k + integer :: startdet, enddet, idx + integer*8 MS + integer ndetI + integer :: getNSOMO + real*8,dimension(:,:),allocatable :: tempBuffer + real*8,dimension(:),allocatable :: tempCoeff + real*8 :: norm_det1, phasedet + + integer :: nt + + + norm_det1 = 0.d0 + MS = elec_alpha_num - elec_beta_num + print *,"Maxbfdim=",NBFMax + print *,"Maxdetdim=",maxDetDimPerBF + print *,"n_CSF=",n_CSF + print *,"N_configurations=",N_configuration + print *,"n_core_orb=",n_core_orb + ! initialization + psi_coef_config = 0.d0 + DetToCSFTransformationMatrix(0,:,:) = 1.d0 + do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2 + Isomo = IBSET(0_8, i) - 1_8 + ! rows = Ncsfs + ! cols = Ndets + bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) + ndetI = max(1,nint((binom(i,(i+1)/2)))) + + allocate(tempBuffer(bfIcfg,ndetI)) + call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) + DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) + deallocate(tempBuffer) + enddo + + integer s, bfIcfg + integer countcsf + countcsf = 0 + integer countdet + countdet = 0 + integer istate + istate = 1 + psi_csf_to_config_data(1) = 1 + phasedet = 1.0d0 + call omp_set_max_active_levels(1) + !$OMP PARALLEL + !$OMP MASTER + do i = 1,N_configuration + startdet = psi_configuration_to_psi_det(1,i) + enddet = psi_configuration_to_psi_det(2,i) + ndetI = enddet-startdet+1 + + allocate(tempCoeff(ndetI)) + countdet = 1 + do j = startdet, enddet + idx = psi_configuration_to_psi_det_data(j) + Ialpha(:) = psi_det(:,1,idx) + Ibeta(:) = psi_det(:,2,idx) + call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) + tempCoeff(countdet) = psi_coef(idx, istate)*phasedet + norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) + countdet += 1 + enddo + + !print *,"dimcoef=",bfIcfg,norm_det1 + !call printMatrix(tempCoeff,ndetI,1) + + s = 0 + do k=1,N_int + if (psi_configuration(k,1,i) == 0_bit_kind) cycle + s = s + popcnt(psi_configuration(k,1,i)) + enddo + bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + + ! perhaps blocking with CFGs of same seniority + ! can be more efficient + allocate(tempBuffer(bfIcfg,ndetI)) + tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) + + call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1)) + !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) + + deallocate(tempCoeff) + deallocate(tempBuffer) + psi_config_data(i,1) = countcsf + 1 + do k=1,bfIcfg + psi_csf_to_config_data(countcsf+k) = i + enddo + countcsf += bfIcfg + psi_config_data(i,2) = countcsf + enddo + print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf + !$OMP END MASTER + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + END_PROVIDER + BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,2)] &BEGIN_PROVIDER [ integer, rowsmax] &BEGIN_PROVIDER [ integer, colsmax] @@ -781,117 +892,6 @@ subroutine calculate_preconditioner_cfg(diag_energies) end subroutine calculate_preconditioner_cfg - BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] - &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF,1)] - &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] - &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)] - use cfunctions - implicit none - BEGIN_DOC - ! Documentation for DetToCSFTransformationMatrix - ! Provides the matrix of transformatons for the - ! conversion between determinant to CSF basis (in BFs) - END_DOC - integer*8 :: Isomo, Idomo - integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int) - integer :: rows, cols, i, j, k - integer :: startdet, enddet, idx - integer*8 MS - integer ndetI - integer :: getNSOMO - real*8,dimension(:,:),allocatable :: tempBuffer - real*8,dimension(:),allocatable :: tempCoeff - real*8 :: norm_det1, phasedet - - integer :: nt - - - norm_det1 = 0.d0 - MS = elec_alpha_num - elec_beta_num - print *,"Maxbfdim=",NBFMax - print *,"Maxdetdim=",maxDetDimPerBF - print *,"n_CSF=",n_CSF - print *,"N_configurations=",N_configuration - print *,"n_core_orb=",n_core_orb - ! initialization - psi_coef_config = 0.d0 - DetToCSFTransformationMatrix(0,:,:) = 1.d0 - do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2 - Isomo = IBSET(0_8, i) - 1_8 - ! rows = Ncsfs - ! cols = Ndets - bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) - ndetI = max(1,nint((binom(i,(i+1)/2)))) - - allocate(tempBuffer(bfIcfg,ndetI)) - call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) - DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) - deallocate(tempBuffer) - enddo - - integer s, bfIcfg - integer countcsf - countcsf = 0 - integer countdet - countdet = 0 - integer istate - istate = 1 - psi_csf_to_config_data(1) = 1 - phasedet = 1.0d0 - call omp_set_max_active_levels(1) - !$OMP PARALLEL - !$OMP MASTER - do i = 1,N_configuration - startdet = psi_configuration_to_psi_det(1,i) - enddet = psi_configuration_to_psi_det(2,i) - ndetI = enddet-startdet+1 - - allocate(tempCoeff(ndetI)) - countdet = 1 - do j = startdet, enddet - idx = psi_configuration_to_psi_det_data(j) - Ialpha(:) = psi_det(:,1,idx) - Ibeta(:) = psi_det(:,2,idx) - call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) - tempCoeff(countdet) = psi_coef(idx, istate)*phasedet - norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) - countdet += 1 - enddo - - !print *,"dimcoef=",bfIcfg,norm_det1 - !call printMatrix(tempCoeff,ndetI,1) - - s = 0 - do k=1,N_int - if (psi_configuration(k,1,i) == 0_bit_kind) cycle - s = s + popcnt(psi_configuration(k,1,i)) - enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) - - ! perhaps blocking with CFGs of same seniority - ! can be more efficient - allocate(tempBuffer(bfIcfg,ndetI)) - tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) - - call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1)) - !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) - - deallocate(tempCoeff) - deallocate(tempBuffer) - psi_config_data(i,1) = countcsf + 1 - do k=1,bfIcfg - psi_csf_to_config_data(countcsf+k) = i - enddo - countcsf += bfIcfg - psi_config_data(i,2) = countcsf - enddo - print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf - !$OMP END MASTER - !$OMP END PARALLEL - call omp_set_max_active_levels(4) - - END_PROVIDER - subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) implicit none use bitmasks @@ -1348,10 +1348,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& !$OMP num_threads_max) - allocate(singlesI(N_INT,2,max(sze,100))) - allocate(idxs_singlesI(max(sze,100))) - allocate(excitationIds_single(2,max(sze,100))) - allocate(excitationTypes_single(max(sze,100))) + allocate(singlesI(N_INT,2,max(sze,1000))) + allocate(idxs_singlesI(max(sze,1000))) + allocate(excitationIds_single(2,max(sze,1000))) + allocate(excitationTypes_single(max(sze,1000))) ! !!! Single Excitations !!! @@ -1488,11 +1488,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(CCmattmp(n_st,NBFmax)) ! Loop over all selected configurations - !$OMP DO SCHEDULE(dynamic,16) + !$OMP DO SCHEDULE(static) do i = istart_cfg,iend_cfg -! TKI = 0.d0 -! GIJpqrs = 0.d0 -! TKIGIJ = 0.d0 ! if Seniority_range > 8 then ! continue @@ -1552,6 +1549,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs !print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI + TKI = 0.d0 + GIJpqrs = 0.d0 + TKIGIJ = 0.d0 totcolsTKI = 0 do j = 1,nconnectedI From a3200652cec0894b9c7a7ed81a43220e2ad98ede Mon Sep 17 00:00:00 2001 From: v1j4y Date: Mon, 6 Jun 2022 21:40:53 +0200 Subject: [PATCH 05/70] Debugging sigma-vector. --- src/csf/cfgCI_utils.c | 1 - src/csf/sigma_vector.irp.f | 121 ++++++++++-------- .../diagonalization_hcsf_dressed.irp.f | 55 +++++--- 3 files changed, 105 insertions(+), 72 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index c11b38dc..a90fbc1f 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -341,7 +341,6 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl Get BFtoDeterminant Matrix ************************************/ - printf("In convertcsftodet\n"); convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); int rowsI = 0; diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 7a84cfc2..80e11cfc 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -40,49 +40,68 @@ detDimperBF = 0 MS = elec_alpha_num-elec_beta_num ! number of cfgs = number of dets for 0 somos - n_CSF = 0 - ncfgprev = cfg_seniority_index(0) - ncfgpersomo = ncfgprev - do i = iand(MS,1), NSOMOMax-2,2 - if(cfg_seniority_index(i) .EQ. -1) then - cycle - endif - if(cfg_seniority_index(i+2) .EQ. -1) then - ncfgpersomo = N_configuration + 1 - else - if(cfg_seniority_index(i+2) > ncfgpersomo) then - ncfgpersomo = cfg_seniority_index(i+2) - else - k = 0 - do while(cfg_seniority_index(i+2+k) < ncfgpersomo) - k = k + 2 - ncfgpersomo = cfg_seniority_index(i+2+k) - enddo - endif - endif - ncfg = ncfgpersomo - ncfgprev - if(i .EQ. 0 .OR. i .EQ. 1) then - dimcsfpercfg = 1 - elseif( i .EQ. 3) then - dimcsfpercfg = 2 - else - if(iand(MS,1) .EQ. 0) then - dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) - else - dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) - endif - endif - n_CSF += ncfg * dimcsfpercfg - if(cfg_seniority_index(i+2) > ncfgprev) then - ncfgprev = cfg_seniority_index(i+2) - else - k = 0 - do while(cfg_seniority_index(i+2+k) < ncfgprev) - k = k + 2 - ncfgprev = cfg_seniority_index(i+2+k) - enddo - endif + n_CSF = cfg_seniority_index(NSOMOMin)-1 + print *,"start=",n_CSF + ncfgprev = cfg_seniority_index(NSOMOMin) + !do i = 0-iand(MS,1)+2, NSOMOMax,2 + do i = NSOMOMin+2, NSOMOMax,2 + if(cfg_seniority_index(i) .EQ. -1)then + ncfgpersomo = N_configuration + 1 + else + ncfgpersomo = cfg_seniority_index(i) + endif + ncfg = ncfgpersomo - ncfgprev + !detDimperBF = max(1,nint((binom(i,(i+1)/2)))) + dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) + n_CSF += ncfg * dimcsfpercfg + print *,i,">(",ncfg,ncfgprev,ncfgpersomo,")",",",detDimperBF,">",dimcsfpercfg, " | dimbas= ", n_CSF + !if(cfg_seniority_index(i+2) == -1) EXIT + !if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF + ncfgprev = cfg_seniority_index(i) enddo + !n_CSF = 0 + !ncfgprev = cfg_seniority_index(0) + !ncfgpersomo = ncfgprev + !do i = iand(MS,1), NSOMOMax-2,2 + ! if(cfg_seniority_index(i) .EQ. -1) then + ! cycle + ! endif + ! if(cfg_seniority_index(i+2) .EQ. -1) then + ! ncfgpersomo = N_configuration + 1 + ! else + ! if(cfg_seniority_index(i+2) > ncfgpersomo) then + ! ncfgpersomo = cfg_seniority_index(i+2) + ! else + ! k = 0 + ! do while(cfg_seniority_index(i+2+k) < ncfgpersomo) + ! k = k + 2 + ! ncfgpersomo = cfg_seniority_index(i+2+k) + ! enddo + ! endif + ! endif + ! ncfg = ncfgpersomo - ncfgprev + ! if(i .EQ. 0 .OR. i .EQ. 1) then + ! dimcsfpercfg = 1 + ! elseif( i .EQ. 3) then + ! dimcsfpercfg = 2 + ! else + ! if(iand(MS,1) .EQ. 0) then + ! dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) + ! else + ! dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) + ! endif + ! endif + ! n_CSF += ncfg * dimcsfpercfg + ! if(cfg_seniority_index(i+2) > ncfgprev) then + ! ncfgprev = cfg_seniority_index(i+2) + ! else + ! k = 0 + ! do while(cfg_seniority_index(i+2+k) < ncfgprev) + ! k = k + 2 + ! ncfgprev = cfg_seniority_index(i+2+k) + ! enddo + ! endif + !enddo END_PROVIDER @@ -1477,14 +1496,14 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds_single) deallocate(excitationTypes_single) - allocate(listconnectedJ(N_INT,2,max(sze,100))) - allocate(alphas_Icfg(N_INT,2,max(sze,100))) - allocate(connectedI_alpha(N_INT,2,max(sze,100))) - allocate(idxs_connectedI_alpha(max(sze,100))) - allocate(excitationIds(2,max(sze,100))) - allocate(excitationTypes(max(sze,100))) - allocate(diagfactors(max(sze,100))) - allocate(idslistconnectedJ(max(sze,100))) + allocate(listconnectedJ(N_INT,2,max(sze,1000))) + allocate(alphas_Icfg(N_INT,2,max(sze,1000))) + allocate(connectedI_alpha(N_INT,2,max(sze,1000))) + allocate(idxs_connectedI_alpha(max(sze,1000))) + allocate(excitationIds(2,max(sze,1000))) + allocate(excitationTypes(max(sze,1000))) + allocate(diagfactors(max(sze,1000))) + allocate(idslistconnectedJ(max(sze,1000))) allocate(CCmattmp(n_st,NBFmax)) ! Loop over all selected configurations @@ -1643,11 +1662,13 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do i = 1,n_CSF do kk=1,n_st psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + print *,psi_in(kk,i)," | ",psi_out(kk,i) enddo enddo !$OMP END DO !$OMP END PARALLEL + stop call omp_set_max_active_levels(4) deallocate(diag_energies) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index b6d5a3f8..2d3ed8b1 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -267,8 +267,10 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N do k=N_st+1,N_st_diag do i=1,sze - call random_number(r1) - call random_number(r2) + !call random_number(r1) + !call random_number(r2) + r1 = 0.5 + r2 = 0.5 r1 = dsqrt(-2.d0*dlog(r1)) r2 = dtwo_pi*r2 u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) @@ -286,8 +288,8 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo ! Make random verctors eigenstates of S2 - call convertWFfromDETtoCSF(N_st_diag,U(1,1),U_csf(1,1)) - call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) + call convertWFfromDETtoCSF(N_st_diag,U,U_csf) + !call convertWFfromCSFtoDET(N_st_diag,U_csf,U) do while (.not.converged) itertot = itertot+1 @@ -330,8 +332,10 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N do kk=1,N_st_diag do ii=1,sze_csf tmpU(kk,ii) = U_csf(ii,shift+kk) + tmpU(kk,ii) = 0.0d0 enddo enddo + tmpU(1,1) = 1.0d0 call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) do kk=1,N_st_diag do ii=1,sze_csf @@ -479,24 +483,24 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! Compute residual vector and davidson step ! ----------------------------------------- - if (without_diagonal) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) - do k=1,N_st_diag - do i=1,sze - U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & - /max(H_jj(i) - lambda (k),1.d-2) - enddo + !if (without_diagonal) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & + /max(H_jj(i) - lambda (k),1.d-2) enddo - !$OMP END PARALLEL DO - else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) - do k=1,N_st_diag - do i=1,sze - U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) - enddo - enddo - !$OMP END PARALLEL DO - endif + enddo + !$OMP END PARALLEL DO + !else + ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + ! do k=1,N_st_diag + ! do i=1,sze + ! U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) + ! enddo + ! enddo + ! !$OMP END PARALLEL DO + !endif do k=1,N_st residual_norm(k) = u_dot_u(U(1,k),sze) @@ -543,6 +547,15 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! Re-contract U ! ------------- + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + W_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,W_csf,W) + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) do k=1,N_st_diag From 6d48611edfe8d08a8abb9f8ab9530f0fcc918377 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Wed, 8 Jun 2022 12:08:44 +0200 Subject: [PATCH 06/70] Cleaning of sigma-vector file. --- src/csf/sigma_vector.irp.f | 26 ++++++++----------- .../diagonalization_hcsf_dressed.irp.f | 2 -- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 80e11cfc..4c5118dc 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -41,7 +41,6 @@ MS = elec_alpha_num-elec_beta_num ! number of cfgs = number of dets for 0 somos n_CSF = cfg_seniority_index(NSOMOMin)-1 - print *,"start=",n_CSF ncfgprev = cfg_seniority_index(NSOMOMin) !do i = 0-iand(MS,1)+2, NSOMOMax,2 do i = NSOMOMin+2, NSOMOMax,2 @@ -54,7 +53,6 @@ !detDimperBF = max(1,nint((binom(i,(i+1)/2)))) dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) n_CSF += ncfg * dimcsfpercfg - print *,i,">(",ncfg,ncfgprev,ncfgpersomo,")",",",detDimperBF,">",dimcsfpercfg, " | dimbas= ", n_CSF !if(cfg_seniority_index(i+2) == -1) EXIT !if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF ncfgprev = cfg_seniority_index(i) @@ -189,11 +187,6 @@ end subroutine get_phase_qp_to_cfg norm_det1 = 0.d0 MS = elec_alpha_num - elec_beta_num - print *,"Maxbfdim=",NBFMax - print *,"Maxdetdim=",maxDetDimPerBF - print *,"n_CSF=",n_CSF - print *,"N_configurations=",N_configuration - print *,"n_core_orb=",n_core_orb ! initialization psi_coef_config = 0.d0 DetToCSFTransformationMatrix(0,:,:) = 1.d0 @@ -266,7 +259,6 @@ end subroutine get_phase_qp_to_cfg countcsf += bfIcfg psi_config_data(i,2) = countcsf enddo - print *,"Norm det=",norm_det1, size(psi_coef_config,1), " Dim csf=", countcsf !$OMP END MASTER !$OMP END PARALLEL call omp_set_max_active_levels(4) @@ -1373,7 +1365,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(excitationTypes_single(max(sze,1000))) ! + !!!====================!!! !!! Single Excitations !!! + !!!====================!!! !$OMP DO SCHEDULE(dynamic,16) do i=istart_cfg,iend_cfg @@ -1473,11 +1467,11 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do jj = startj, endj cntj = jj-startj+1 meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) - call omp_set_lock(lock(jj)) + !call omp_set_lock(lock(jj)) do kk = 1,n_st psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) enddo - call omp_unset_lock(lock(jj)) + !call omp_unset_lock(lock(jj)) enddo enddo @@ -1506,6 +1500,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(idslistconnectedJ(max(sze,1000))) allocate(CCmattmp(n_st,NBFmax)) + !!!====================!!! + !!! Double Excitations !!! + !!!====================!!! + ! Loop over all selected configurations !$OMP DO SCHEDULE(static) do i = istart_cfg,iend_cfg @@ -1534,7 +1532,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) ! TODO : remove doubly excited for return - ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. do k = 1,Nalphas_Icfg ! Now generate all singly excited with respect to a given alpha CFG @@ -1546,6 +1543,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze cycle endif + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. totcolsTKI = 0 rowsTKI = -1 NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) @@ -1632,11 +1630,11 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze CCmattmp, size(CCmattmp,1) ) do m = 1,colsikpq - call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + !call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) do kk = 1,n_st psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) enddo - call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) + !call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) enddo totcolsTKI += colsikpq enddo @@ -1662,13 +1660,11 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do i = 1,n_CSF do kk=1,n_st psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) - print *,psi_in(kk,i)," | ",psi_out(kk,i) enddo enddo !$OMP END DO !$OMP END PARALLEL - stop call omp_set_max_active_levels(4) deallocate(diag_energies) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 2d3ed8b1..fa00e682 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -332,10 +332,8 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N do kk=1,N_st_diag do ii=1,sze_csf tmpU(kk,ii) = U_csf(ii,shift+kk) - tmpU(kk,ii) = 0.0d0 enddo enddo - tmpU(1,1) = 1.0d0 call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) do kk=1,N_st_diag do ii=1,sze_csf From 394a107e275724f3defd3b34b788faaf62752b22 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Wed, 8 Jun 2022 18:06:41 +0200 Subject: [PATCH 07/70] Rewrite counting N_CSFs. --- src/csf/configurations.irp.f | 2 +- src/csf/sigma_vector.irp.f | 107 ++++++++++++++++++++++++----------- 2 files changed, 75 insertions(+), 34 deletions(-) diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f index ad7f5294..c11a49a4 100644 --- a/src/csf/configurations.irp.f +++ b/src/csf/configurations.irp.f @@ -458,7 +458,7 @@ end END_PROVIDER - BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ] + BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num+2) ] &BEGIN_PROVIDER [ integer, cfg_nsomo_max ] &BEGIN_PROVIDER [ integer, cfg_nsomo_min ] implicit none diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 4c5118dc..8e5a7154 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -43,21 +43,24 @@ n_CSF = cfg_seniority_index(NSOMOMin)-1 ncfgprev = cfg_seniority_index(NSOMOMin) !do i = 0-iand(MS,1)+2, NSOMOMax,2 - do i = NSOMOMin+2, NSOMOMax,2 - if(cfg_seniority_index(i) .EQ. -1)then - ncfgpersomo = N_configuration + 1 - else - ncfgpersomo = cfg_seniority_index(i) - endif - ncfg = ncfgpersomo - ncfgprev - !detDimperBF = max(1,nint((binom(i,(i+1)/2)))) - dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) - n_CSF += ncfg * dimcsfpercfg - !if(cfg_seniority_index(i+2) == -1) EXIT - !if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF - ncfgprev = cfg_seniority_index(i) - enddo - !n_CSF = 0 + !!print *," i=",0," dimcsf=",1," ncfg=",ncfgprev, " senor=",cfg_seniority_index(0) + !!do i = NSOMOMin+2, NSOMOMax,2 + !! if(cfg_seniority_index(i) .EQ. -1)then + !! ncfgpersomo = N_configuration + 1 + !! else + !! ncfgpersomo = cfg_seniority_index(i) + !! endif + !!ncfg = ncfgpersomo - ncfgprev + !!!detDimperBF = max(1,nint((binom(i,(i+1)/2)))) + !!!dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) + !!n_CSF += ncfg * dimcsfpercfg + !!!if(cfg_seniority_index(i+2) == -1) EXIT + !!!if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF + !!ncfgprev = cfg_seniority_index(i) + !!print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i) + !!enddo + !!print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration + n_CSF = 0 !ncfgprev = cfg_seniority_index(0) !ncfgpersomo = ncfgprev !do i = iand(MS,1), NSOMOMax-2,2 @@ -90,6 +93,7 @@ ! endif ! endif ! n_CSF += ncfg * dimcsfpercfg + ! print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i) ! if(cfg_seniority_index(i+2) > ncfgprev) then ! ncfgprev = cfg_seniority_index(i+2) ! else @@ -100,6 +104,38 @@ ! enddo ! endif !enddo + n_CSF = 0 + ncfgprev = cfg_seniority_index(0) ! should be 1 + do i=NSOMOMin,NSOMOMax+2,2 + !k=0 + !do while((cfg_seniority_index(i+2+k) .eq. -1) .and. (k.le.NSOMOMax)) + ! k=k+2 + !end do + if(cfg_seniority_index(i).eq.-1)cycle + if(cfg_seniority_index(i+2).eq.-1)then + ncfg = N_configuration - ncfgprev + 1 + if(ncfg .eq. 0)then + ncfg=1 + endif + else + ncfg = cfg_seniority_index(i+2) - ncfgprev + endif + if(i .EQ. 0 .OR. i .EQ. 1) then + dimcsfpercfg = 1 + elseif( i .EQ. 3) then + dimcsfpercfg = 2 + else + if(iand(MS,1) .EQ. 0) then + dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) + else + dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) + endif + endif + n_CSF += ncfg*dimcsfpercfg + print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i) + ncfgprev = cfg_seniority_index(i+2) + end do + print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration END_PROVIDER @@ -1307,6 +1343,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do i=1,sze call omp_init_lock(lock(i)) enddo + !do i=1,size(psi_config_data,1) + ! print *,"i=",i," psi_cfg_data_1=",psi_config_data(i,1)," psi_cfg_data_2=",psi_config_data(i,2) + !end do !print *," sze = ",sze allocate(diag_energies(n_CSF)) @@ -1325,8 +1364,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze maxnalphas = elec_num*mo_num Icfg(1,1) = psi_configuration(1,1,1) Icfg(1,2) = psi_configuration(1,2,1) - allocate(listconnectedJ(N_INT,2,max(sze,100))) - allocate(idslistconnectedJ(max(sze,100))) + allocate(listconnectedJ(N_INT,2,max(sze,10000))) + allocate(idslistconnectedJ(max(sze,10000))) call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax) deallocate(listconnectedJ) deallocate(idslistconnectedJ) @@ -1359,10 +1398,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& !$OMP num_threads_max) - allocate(singlesI(N_INT,2,max(sze,1000))) - allocate(idxs_singlesI(max(sze,1000))) - allocate(excitationIds_single(2,max(sze,1000))) - allocate(excitationTypes_single(max(sze,1000))) + allocate(singlesI(N_INT,2,max(sze,10000))) + allocate(idxs_singlesI(max(sze,10000))) + allocate(excitationIds_single(2,max(sze,10000))) + allocate(excitationTypes_single(max(sze,10000))) ! !!!====================!!! @@ -1460,6 +1499,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze startj = psi_config_data(idxI,1) endj = psi_config_data(idxI,2) + !print *,"i=",i," idxI=",idxI," startj=",startj," endj=",endj," sze=",sze !!! One-electron contribution !!! do ii = starti, endi @@ -1467,11 +1507,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do jj = startj, endj cntj = jj-startj+1 meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) - !call omp_set_lock(lock(jj)) + !print *,"jj = ",jj + call omp_set_lock(lock(jj)) do kk = 1,n_st psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) enddo - !call omp_unset_lock(lock(jj)) + call omp_unset_lock(lock(jj)) enddo enddo @@ -1490,14 +1531,14 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds_single) deallocate(excitationTypes_single) - allocate(listconnectedJ(N_INT,2,max(sze,1000))) - allocate(alphas_Icfg(N_INT,2,max(sze,1000))) - allocate(connectedI_alpha(N_INT,2,max(sze,1000))) - allocate(idxs_connectedI_alpha(max(sze,1000))) - allocate(excitationIds(2,max(sze,1000))) - allocate(excitationTypes(max(sze,1000))) - allocate(diagfactors(max(sze,1000))) - allocate(idslistconnectedJ(max(sze,1000))) + allocate(listconnectedJ(N_INT,2,max(sze,10000))) + allocate(alphas_Icfg(N_INT,2,max(sze,10000))) + allocate(connectedI_alpha(N_INT,2,max(sze,10000))) + allocate(idxs_connectedI_alpha(max(sze,10000))) + allocate(excitationIds(2,max(sze,10000))) + allocate(excitationTypes(max(sze,10000))) + allocate(diagfactors(max(sze,10000))) + allocate(idslistconnectedJ(max(sze,10000))) allocate(CCmattmp(n_st,NBFmax)) !!!====================!!! @@ -1630,11 +1671,11 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze CCmattmp, size(CCmattmp,1) ) do m = 1,colsikpq - !call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) do kk = 1,n_st psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) enddo - !call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) + call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) enddo totcolsTKI += colsikpq enddo From 9b8a1f5a9edc545c3eb78aba17cdf0b93dd4d115 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Mon, 13 Jun 2022 16:21:08 +0200 Subject: [PATCH 08/70] Fixed bug in obtain_I_connected. Now energy is correct for NH cc-pvdz. --- src/csf/cfgCI_utils.c | 1 - src/csf/sigma_vector.irp.f | 9 ++++++--- src/davidson/diagonalization_hcsf_dressed.irp.f | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index a90fbc1f..0e4dd91f 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1671,7 +1671,6 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsI = 0; int colsI = 0; - //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 8e5a7154..6748b312 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1576,9 +1576,12 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do k = 1,Nalphas_Icfg ! Now generate all singly excited with respect to a given alpha CFG - call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, & - listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, & - excitationIds,excitationTypes,diagfactors) + !call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, & + ! listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, & + ! excitationIds,excitationTypes,diagfactors) + + call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, & + nconnectedI, excitationIds, excitationTypes, diagfactors) if(nconnectedI .EQ. 0) then cycle diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index fa00e682..dd12fcd5 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -88,7 +88,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, intent(out) :: energies(N_st_diag_in) integer :: iter, N_st_diag - integer :: i,j,k,l,m,kk,ii + integer :: i,j,k,l,m,kk,ii,ll logical, intent(inout) :: converged double precision, external :: u_dot_v, u_dot_u @@ -248,6 +248,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N residual_norm(N_st_diag), & lambda(N_st_diag*itermax)) + h = 0.d0 U = 0.d0 y = 0.d0 @@ -340,6 +341,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N W_csf(ii,shift+kk)=tmpW(kk,ii) enddo enddo + deallocate(tmpW) deallocate(tmpU) endif From fddf6b7957cb46626c90bdd95d6a9c3f494b3d06 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 00:09:49 +0200 Subject: [PATCH 09/70] Sigma-vector works for multiple roots. --- src/davidson/diagonalization_hcsf_dressed.irp.f | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index dd12fcd5..63d105e9 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -265,7 +265,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! =================== converged = .False. - + kk=1 do k=N_st+1,N_st_diag do i=1,sze !call random_number(r1) @@ -276,7 +276,11 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N r2 = dtwo_pi*r2 u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) enddo - u_in(k,k) = u_in(k,k) + 10.d0 + do while(POPCNT(AND(psi_det(1,1,kk),psi_det(1,2,kk))) .ne. 3) + kk=kk+1 + end do + u_in(kk,k) = u_in(kk,k) + 10.d0 + kk=kk+1 enddo do k=1,N_st_diag call normalize(u_in(1,k),sze) From 566a56d22fe4eae024c4658f06314cb5652cd665 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 11:29:02 +0200 Subject: [PATCH 10/70] Improved guess for csf davidson. --- .../diagonalization_hcsf_dressed.irp.f | 98 +++++-------------- 1 file changed, 27 insertions(+), 71 deletions(-) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 63d105e9..4e06c675 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -88,7 +88,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, intent(out) :: energies(N_st_diag_in) integer :: iter, N_st_diag - integer :: i,j,k,l,m,kk,ii,ll + integer :: i,j,k,l,m,kk logical, intent(inout) :: converged double precision, external :: u_dot_v, u_dot_u @@ -110,7 +110,6 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N integer :: order(N_st_diag_in) double precision :: cmax double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) - double precision, allocatable :: tmpU(:,:), tmpW(:,:) double precision, pointer :: W(:,:), W_csf(:,:) logical :: disk_based double precision :: energy_shift(N_st_diag_in*davidson_sze_max) @@ -248,7 +247,6 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N residual_norm(N_st_diag), & lambda(N_st_diag*itermax)) - h = 0.d0 U = 0.d0 y = 0.d0 @@ -265,13 +263,12 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! =================== converged = .False. + kk=1 do k=N_st+1,N_st_diag do i=1,sze - !call random_number(r1) - !call random_number(r2) - r1 = 0.5 - r2 = 0.5 + call random_number(r1) + call random_number(r2) r1 = dsqrt(-2.d0*dlog(r1)) r2 = dtwo_pi*r2 u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) @@ -293,8 +290,8 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo ! Make random verctors eigenstates of S2 - call convertWFfromDETtoCSF(N_st_diag,U,U_csf) - !call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + call convertWFfromDETtoCSF(N_st_diag,U(1,1),U_csf(1,1)) + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) do while (.not.converged) itertot = itertot+1 @@ -311,43 +308,11 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! Compute |W_k> = \sum_i |i> ! ----------------------------------- - !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) - PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) if ((sze > 100000).and.distributed_davidson) then - !call H_u_0_nstates_zmq (W,U,N_st_diag,sze) - allocate(tmpW(N_st_diag,sze_csf)) - allocate(tmpU(N_st_diag,sze_csf)) - do kk=1,N_st_diag - do ii=1,sze_csf - tmpU(kk,ii) = U_csf(ii,shift+kk) - enddo - enddo - call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) - do kk=1,N_st_diag - do ii=1,sze_csf - W_csf(ii,shift+kk)=tmpW(kk,ii) - enddo - enddo - deallocate(tmpW) - deallocate(tmpU) + call H_u_0_nstates_zmq (W,U,N_st_diag,sze) else - !call H_u_0_nstates_openmp(W,U,N_st_diag,sze) - allocate(tmpW(N_st_diag,sze_csf)) - allocate(tmpU(N_st_diag,sze_csf)) - do kk=1,N_st_diag - do ii=1,sze_csf - tmpU(kk,ii) = U_csf(ii,shift+kk) - enddo - enddo - call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) - do kk=1,N_st_diag - do ii=1,sze_csf - W_csf(ii,shift+kk)=tmpW(kk,ii) - enddo - enddo - - deallocate(tmpW) - deallocate(tmpU) + call H_u_0_nstates_openmp(W,U,N_st_diag,sze) endif ! else ! ! Already computed in update below @@ -391,7 +356,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N endif endif - !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) + call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) ! Compute h_kl = = ! ------------------------------------------- @@ -487,24 +452,24 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! Compute residual vector and davidson step ! ----------------------------------------- - !if (without_diagonal) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) - do k=1,N_st_diag - do i=1,sze - U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & - /max(H_jj(i) - lambda (k),1.d-2) + if (without_diagonal) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & + /max(H_jj(i) - lambda (k),1.d-2) + enddo enddo - enddo - !$OMP END PARALLEL DO - !else - ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) - ! do k=1,N_st_diag - ! do i=1,sze - ! U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) - ! enddo - ! enddo - ! !$OMP END PARALLEL DO - !endif + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) + enddo + enddo + !$OMP END PARALLEL DO + endif do k=1,N_st residual_norm(k) = u_dot_u(U(1,k),sze) @@ -551,15 +516,6 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! Re-contract U ! ------------- - call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & - W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) - do k=1,N_st_diag - do i=1,sze_csf - W_csf(i,k) = u_in(i,k) - enddo - enddo - call convertWFfromCSFtoDET(N_st_diag,W_csf,W) - call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) do k=1,N_st_diag From 8a141ef25c9a6f52c5fd5acd407ae1e29deae03b Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 11:29:16 +0200 Subject: [PATCH 11/70] Removed debug print. --- src/csf/cfgCI_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 5190785a..fdc09325 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1344,7 +1344,7 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv } printf("\n"); findAddofDetDriver(dettree, NSOMO, inpdet, &addr); - printf("(%d) - addr = %d\n",i,addr); + //printf("(%d) - addr = %d\n",i,addr); // Calculate the phase for cfg to QP2 conversion //get_phase_cfg_to_qp_inpList(inpdet, NSOMO, &phase_cfg_to_qp); //rowvec[addr] = 1.0 * phaselist[i]*phase_cfg_to_qp/sqrt(fac); From 6592f35dafc46c1025ae75f27587f2441f979fd1 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 11:29:41 +0200 Subject: [PATCH 12/70] Activated cfg based davidson. --- src/davidson/diagonalize_ci.irp.f | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 6930cc07..78478c57 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -5,7 +5,8 @@ BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ] ! ! If 'cfg', use in Davidson END_DOC - sigma_vector_algorithm = 'det' + !sigma_vector_algorithm = 'det' + sigma_vector_algorithm = 'cfg' END_PROVIDER BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ] @@ -75,13 +76,13 @@ END_PROVIDER call davidson_diag_H_csf(psi_det,CI_eigenvectors, & size(CI_eigenvectors,1),CI_electronic_energy, & N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) -! else if (sigma_vector_algorithm == 'cfg') then -! call davidson_diag_H_csf(psi_det,CI_eigenvectors, & -! size(CI_eigenvectors,1),CI_electronic_energy, & -! N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) -! else -! print *, irp_here -! stop 'bug' + else if (sigma_vector_algorithm == 'cfg') then + call davidson_diag_H_cfg(psi_det,CI_eigenvectors, & + size(CI_eigenvectors,1),CI_electronic_energy, & + N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) + else + print *, irp_here + stop 'bug' endif else call davidson_diag_HS2(psi_det,CI_eigenvectors, CI_s2, & From 63e617221cb1e6b6bdb30438dc1954a7658b89ca Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 11:30:07 +0200 Subject: [PATCH 13/70] Added new file with CFG based sigma-vector. --- src/davidson/diagonalization_hcfg.irp.f | 617 ++++++++++++++++++++++++ 1 file changed, 617 insertions(+) create mode 100644 src/davidson/diagonalization_hcfg.irp.f diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f new file mode 100644 index 00000000..140c3b69 --- /dev/null +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -0,0 +1,617 @@ +subroutine davidson_diag_h_cfg(dets_in,u_in,dim_in,energies,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + integer, intent(in) :: dressing_state + logical, intent(out) :: converged + double precision, allocatable :: H_jj(:) + + double precision, external :: diag_H_mat_elem, diag_S_mat_elem + integer :: i,k + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_two_e_integrals_in_map + allocate(H_jj(sze)) + + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze,H_jj, dets_in,Nint) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(static) + do i=2,sze + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + if (dressing_state > 0) then + do k=1,N_st + do i=1,sze + H_jj(i) += u_in(i,k) * dressing_column_h(i,k) + enddo + enddo + endif + + call davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + deallocate(H_jj) +end + + +subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged) + use bitmasks + use mmap_module + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze + ! + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + integer, intent(in) :: dressing_state + double precision, intent(inout) :: u_in(dim_in,N_st_diag_in) + double precision, intent(out) :: energies(N_st_diag_in) + + integer :: iter, N_st_diag + integer :: i,j,k,l,m,kk,ii,ll + logical, intent(inout) :: converged + + double precision, external :: u_dot_v, u_dot_u + + integer :: k_pairs, kl + + integer :: iter2, itertot + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(2,N_st) + double precision :: cpu, wall + integer :: shift, shift2, itermax, istate + double precision :: r1, r2, alpha + logical :: state_ok(N_st_diag_in*davidson_sze_max) + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: cmax + double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) + double precision, allocatable :: tmpU(:,:), tmpW(:,:) + double precision, pointer :: W(:,:), W_csf(:,:) + logical :: disk_based + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + + include 'constants.include.F' + + N_st_diag = N_st_diag_in + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1 + itertot = 0 + + if (state_following) then + allocate(overlap(N_st_diag*itermax, N_st_diag*itermax)) + else + allocate(overlap(1,1)) ! avoid 'if' for deallocate + endif + overlap = 0.d0 + + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2 + + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag) &! U + + dble(sze_csf)*(N_st_diag*itermax) &! U_csf + + dble(sze)*(N_st_diag) &! W + + dble(sze_csf)*(N_st_diag*itermax) &! W_csf + + 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp + + 1.d0*(N_st_diag*itermax) &! lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_u_0_nstates_zmq + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave + + 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(r1,irp_here) + nproc_target = 1 + exit + endif + + if (r1+rss < qp_max_mem) then + exit + endif + + if (itermax > 4) then + itermax = itermax - 1 + else if (m==1.and.disk_based_davidson) then + m=0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + call write_int(6,N_st,'Number of states') + call write_int(6,N_st_diag,'Number of states in diagonalization') + call write_int(6,sze,'Number of determinants') + call write_int(6,sze_csf,'Number of CSFs') + call write_int(6,nproc_target,'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if (disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + if (disk_based) then + ! Create memory-mapped files for W and S + type(c_ptr) :: ptr_w, ptr_s + integer :: fd_s, fd_w + call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& + 8, fd_w, .False., ptr_w) + call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) + else + allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) + endif + + allocate( & + ! Large + U(sze,N_st_diag), & + U_csf(sze_csf,N_st_diag*itermax), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + residual_norm(N_st_diag), & + lambda(N_st_diag*itermax)) + + + h = 0.d0 + U = 0.d0 + y = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + kk=1 + do k=N_st+1,N_st_diag + do i=1,sze + !call random_number(r1) + !call random_number(r2) + r1 = 0.5 + r2 = 0.5 + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) + enddo + do while(POPCNT(AND(psi_det(1,1,kk),psi_det(1,2,kk))) .ne. 3) + kk=kk+1 + end do + u_in(kk,k) = u_in(kk,k) + 10.d0 + kk=kk+1 + enddo + do k=1,N_st_diag + call normalize(u_in(1,k),sze) + enddo + + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! Make random verctors eigenstates of S2 + call convertWFfromDETtoCSF(N_st_diag,U,U_csf) + !call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + + do while (.not.converged) + itertot = itertot+1 + if (itertot == 8) then + exit + endif + + do iter=1,itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + +! if ((iter > 1).or.(itertot == 1)) then + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- + + !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + if ((sze > 100000).and.distributed_davidson) then + !call H_u_0_nstates_zmq (W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + deallocate(tmpW) + deallocate(tmpU) + else + !call H_u_0_nstates_openmp(W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + + deallocate(tmpW) + deallocate(tmpU) + endif +! else +! ! Already computed in update below +! continue +! endif + + if (dressing_state > 0) then + + if (N_st == 1) then + + l = dressed_column_idx(1) + double precision :: f + f = 1.0d0/psi_coef(l,1) + do istate=1,N_st_diag + do i=1,sze + W(i,istate) += dressing_column_h(i,1) *f * U(l,istate) + W(l,istate) += dressing_column_h(i,1) *f * U(i,istate) + enddo + + enddo + + else + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + psi_coef, size(psi_coef,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + dressing_column_h, size(dressing_column_h,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + endif + endif + + !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) + + ! Compute h_kl = = + ! ------------------------------------------- + + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1), & + 0.d0, h, size(h,1)) + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), U_csf, size(U_csf,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + ! Diagonalize h + ! --------------- + + integer :: lwork, info + double precision, allocatable :: work(:) + + y = h + lwork = -1 + allocate(work(1)) + call dsygv(1,'V','U',shift2,y,size(y,1), & + s_tmp,size(s_tmp,1), lambda, work,lwork,info) + lwork = int(work(1)) + deallocate(work) + allocate(work(lwork)) + call dsygv(1,'V','U',shift2,y,size(y,1), & + s_tmp,size(s_tmp,1), lambda, work,lwork,info) + deallocate(work) + if (info /= 0) then + stop 'DSYGV Diagonalization failed' + endif + + ! Compute Energy for each eigenvector + ! ----------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, h, size(h,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, h, size(h,1)) + + do k=1,shift2 + lambda(k) = h(k,k) + enddo + + if (state_following) then + + overlap = -1.d0 + do i=1,shift2 + do k=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + enddo + + endif + + + ! Express eigenvectors of h in the csf basis + ! ------------------------------------------ + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1)) + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U) + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1)) + call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !if (without_diagonal) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & + /max(H_jj(i) - lambda (k),1.d-2) + enddo + enddo + !$OMP END PARALLEL DO + !else + ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + ! do k=1,N_st_diag + ! do i=1,sze + ! U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) + ! enddo + ! enddo + ! !$OMP END PARALLEL DO + !endif + + do k=1,N_st + residual_norm(k) = u_dot_u(U(1,k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + enddo + call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1)) + + if ((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if (iter > 1) then + if (threshold_davidson_from_pt2) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2 + else + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson + endif + endif + + do k=1,N_st + if (residual_norm(k) > 1.d8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if (converged) then + exit + endif + + logical, external :: qp_stop + if (qp_stop()) then + converged = .True. + exit + endif + + + enddo + + ! Re-contract U + ! ------------- + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + W_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,W_csf,W) + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + U_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + + enddo + + + call nullify_small_elements(sze,N_st_diag,U,size(U,1),threshold_davidson_pt2) + do k=1,N_st_diag + do i=1,sze + u_in(i,k) = U(i,k) + enddo + enddo + + do k=1,N_st_diag + energies(k) = lambda(k) + enddo + write_buffer = '======' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + if (disk_based)then + ! Remove temp files + integer, external :: getUnitAndOpen + call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w ) + fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r') + close(fd_w,status='delete') + else + deallocate(W, W_csf) + endif + + deallocate ( & + residual_norm, & + U, U_csf, overlap, & + h, y, s_tmp, & + lambda & + ) + FREE nthreads_davidson +end + + + + + + + From 7e394e4a0129cdccf47969473ec4460438da188f Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 11:32:26 +0200 Subject: [PATCH 14/70] Removed debug print. --- src/csf/cfgCI_utils.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index fdc09325..4a4280ee 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1338,11 +1338,11 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv int phase_cfg_to_qp=1; int addr = -1; for(int i = 0; i < npairs; i++){ - for(int j = 0; j < NSOMO; j++) { - inpdet[j] = detslist[i*NSOMO + j]; - printf(" %d ",inpdet[j]); - } - printf("\n"); + //for(int j = 0; j < NSOMO; j++) { + // inpdet[j] = detslist[i*NSOMO + j]; + // printf(" %d ",inpdet[j]); + //} + //printf("\n"); findAddofDetDriver(dettree, NSOMO, inpdet, &addr); //printf("(%d) - addr = %d\n",i,addr); // Calculate the phase for cfg to QP2 conversion From da9f676513b38dc2d48b0bf97cdb10763fda1299 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 11:34:37 +0200 Subject: [PATCH 15/70] Fixed typo. --- src/csf/cfgCI_utils.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 4a4280ee..be4653a3 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1338,10 +1338,10 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv int phase_cfg_to_qp=1; int addr = -1; for(int i = 0; i < npairs; i++){ - //for(int j = 0; j < NSOMO; j++) { - // inpdet[j] = detslist[i*NSOMO + j]; - // printf(" %d ",inpdet[j]); - //} + for(int j = 0; j < NSOMO; j++) { + inpdet[j] = detslist[i*NSOMO + j]; + //printf(" %d ",inpdet[j]); + } //printf("\n"); findAddofDetDriver(dettree, NSOMO, inpdet, &addr); //printf("(%d) - addr = %d\n",i,addr); From 14a0ff375ae4426be2127e8341ad7f483160ca47 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 12:51:35 +0200 Subject: [PATCH 16/70] Added missing file. --- src/csf/obtain_I_foralpha.irp.f | 383 ++++++++++++++++++++++++++++++++ 1 file changed, 383 insertions(+) create mode 100644 src/csf/obtain_I_foralpha.irp.f diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f new file mode 100644 index 00000000..d6fbe386 --- /dev/null +++ b/src/csf/obtain_I_foralpha.irp.f @@ -0,0 +1,383 @@ +subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI,ntotalconnectedI) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! givenI by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer(bit_kind),intent(in) :: givenI(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: ntotalconnectedI + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_I + + ! + ! 2 2 1 1 0 0 : 1 1 0 0 0 0 + ! 0 0 1 1 0 0 + ! + ! 2 1 1 1 1 0 : 1 0 0 0 0 0 + ! 0 1 1 1 1 0 + !xorS 0 1 0 0 1 0 : 2 + !xorD 0 1 0 0 0 0 : 1 + !xorSD 0 0 0 0 1 0 : 1 + ! ----- + ! 4 + ! 1 1 1 1 1 1 : 0 0 0 0 0 0 + ! 1 1 1 1 1 1 + ! 1 1 0 0 1 1 : 4 + ! 1 1 0 0 0 0 : 2 + ! 0 0 0 0 1 1 : 2 + ! ----- + ! 8 + ! + + nconnectedI = 0 + ntotalconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = givenI(1,1) + Idomo = givenI(1,2) + Nsomo_I = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + !end_index = N_configuration + !print *,"Start and End = ",idxI, end_index + + + p = 0 + q = 0 + do i=idxI,end_index + !if(.True.) then + ! nconnectedI += 1 + ! connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + ! idxs_connectedI(nconnectedI)=i + ! cycle + !endif + Isomo = givenI(1,1) + Idomo = givenI(1,2) + Jsomo = psi_configuration(1,1,i) + Jdomo = psi_configuration(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + !------- + ! MONO | + !------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then + !---------------------------- + ! DOMO -> VMO + DOMO -> VMO | + !---------------------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then + !---------------------------- + ! DOUBLE + !---------------------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then + !----------------- + ! DOUBLE + !----------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then + !----------------- + ! DOUBLE + !----------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + !-------- + ! I = I | + !-------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)= i + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + do ii = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes) + endif + end do + +end subroutine obtain_connected_J_givenI + +subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! Ialpha by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: excitationIds(2,*) + integer,intent(out) :: excitationTypes(*) + real*8 ,intent(out) :: diagfactors(*) + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + + nconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Nsomo_alpha = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + end_index = N_configuration + + + p = 0 + q = 0 + if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1' + do i=idxI,end_index + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = psi_configuration(1,1,i) + Jdomo = psi_configuration(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + !if(idxI.eq.1)then + ! print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo) + !endif + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + select case(ndiffDOMO) + case (0) + ! SOMO -> VMO + !print *,"obt SOMO -> VMO" + extyp = 3 + IJsomo = IEOR(Isomo, Jsomo) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 +!IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IJsomo) + 1 +!IRP_ENDIF + case (1) + ! DOMO -> VMO + ! or + ! SOMO -> SOMO + nsomoJ = POPCNT(Jsomo) + nsomoalpha = POPCNT(Isomo) + if(nsomoJ .GT. nsomoalpha) then + ! DOMO -> VMO + !print *,"obt DOMO -> VMO" + extyp = 2 +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +!IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(Isomo) + 1 +!IRP_ENDIF + else + ! SOMO -> SOMO + !print *,"obt SOMO -> SOMO" + extyp = 1 +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +!IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,q-1) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +!IRP_ELSE + p = TRAILZ(Isomo) + 1 +!IRP_ENDIF + end if + case (2) + ! DOMO -> SOMO + !print *,"obt DOMO -> SOMO" + extyp = 4 + IJsomo = IEOR(Isomo, Jsomo) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 +!IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IJsomo) + 1 +!IRP_ENDIF + case default + print *,"something went wront in get connectedI" + end select + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + do k=1,N_int + connectedI(k,1,nconnectedI) = psi_configuration(k,1,i) + connectedI(k,2,nconnectedI) = psi_configuration(k,2,i) + enddo + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + do ii = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + + do k=1,nholes + p = listholes(k) + q = p + extyp = 1 + if(holetype(k) .EQ. 1) then + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 2.0d0 + endif + enddo + endif + end do + +end subroutine obtain_connected_I_foralpha From e3940ae2942a2c6ed93f7722e5a66c856a629b54 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 18:39:03 +0200 Subject: [PATCH 17/70] Update qp2-dependencies. --- external/qp2-dependencies | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 90ee61f5..242151e0 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 90ee61f5041c7c94a0c605625a264860292813a0 +Subproject commit 242151e03d1d6bf042387226431d82d35845686a From 4064243da312068b3ea476d7cf103388a3141938 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 14 Jun 2022 21:42:08 +0200 Subject: [PATCH 18/70] Update davidson guess for cfg. --- src/davidson/diagonalization_hcfg.irp.f | 31 +++++++------------------ 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 140c3b69..bde2bdd9 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -265,36 +265,21 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N ! =================== converged = .False. - kk=1 + call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1)) do k=N_st+1,N_st_diag - do i=1,sze - !call random_number(r1) - !call random_number(r2) - r1 = 0.5 - r2 = 0.5 + do i=1,sze_csf + call random_number(r1) + call random_number(r2) r1 = dsqrt(-2.d0*dlog(r1)) r2 = dtwo_pi*r2 - u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) + U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st) enddo - do while(POPCNT(AND(psi_det(1,1,kk),psi_det(1,2,kk))) .ne. 3) - kk=kk+1 - end do - u_in(kk,k) = u_in(kk,k) + 10.d0 - kk=kk+1 + U_csf(k,k) = u_csf(k,k) + 10.d0 enddo do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(U_csf(1,k),sze_csf) enddo - - do k=1,N_st_diag - do i=1,sze - U(i,k) = u_in(i,k) - enddo - enddo - - ! Make random verctors eigenstates of S2 - call convertWFfromDETtoCSF(N_st_diag,U,U_csf) - !call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) do while (.not.converged) itertot = itertot+1 From a6859c072bdefa16f2ce214e8d3c3d125ef50107 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 17 Jun 2022 11:05:27 +0200 Subject: [PATCH 19/70] Working on frozen core for CFG-CI. --- src/csf/configuration_CI_sigma_helpers.irp.f | 68 ++++++++++++-------- src/csf/create_excitations.irp.f | 9 ++- src/csf/obtain_I_foralpha.irp.f | 20 ++++-- src/csf/sigma_vector.irp.f | 55 ++++++++++------ src/mo_two_e_ints/core_quantities.irp.f | 52 +++++++++++++++ 5 files changed, 149 insertions(+), 55 deletions(-) diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index a4aa41ff..1a8c1d14 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -31,9 +31,9 @@ use bitmasks integer :: ndiffDOMO integer :: nxordiffSOMODOMO integer :: ndiffAll - integer :: i - integer :: j - integer :: k + integer :: i,ii + integer :: j,jj + integer :: k,kk integer :: kstart integer :: kend integer :: Nsomo_I @@ -55,13 +55,15 @@ use bitmasks Icfg = psi_configuration(:,:,idxI) - Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) - Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) ! find out all pq holes possible nholes = 0 ! holes in SOMO - do i = 1,mo_num + !do i = 1,mo_num + do ii = 1,n_act_orb + i = list_act(ii) if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then nholes += 1 listholes(nholes) = i @@ -69,7 +71,9 @@ use bitmasks endif end do ! holes in DOMO - do i = 1,mo_num + !do i = 1,mo_num + do ii = 1,n_act_orb + i = list_act(ii) if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then nholes += 1 listholes(nholes) = i @@ -81,7 +85,9 @@ use bitmasks listvmos = -1 vmotype = -1 nvmos = 0 - do i = 1,mo_num + !do i = 1,mo_num + do ii = 1,n_act_orb + i = list_act(ii) if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then nvmos += 1 @@ -98,8 +104,8 @@ use bitmasks tableUniqueAlphas = .FALSE. ! Now find the allowed (p,q) excitations - Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) - Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) Nsomo_I = POPCNT(Isomo) if(Nsomo_I .EQ. 0) then kstart = 1 @@ -239,10 +245,10 @@ use bitmasks Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) do k = kstart, idxI-1 - diffSOMO = IEOR(Isomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) ndiffSOMO = POPCNT(diffSOMO) if (ndiffSOMO /= 2) cycle - diffDOMO = IEOR(Idomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) ndiffDOMO = POPCNT(diffDOMO) nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) @@ -298,9 +304,9 @@ END_PROVIDER integer :: ndiffDOMO integer :: nxordiffSOMODOMO integer :: ndiffAll - integer :: i - integer :: j - integer :: k + integer :: i, ii + integer :: j, jj + integer :: k, kk integer :: kstart integer :: kend integer :: Nsomo_I @@ -311,8 +317,8 @@ END_PROVIDER logical :: pqAlreadyGenQ logical :: pqExistsQ logical :: ppExistsQ - Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) - Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) !print*,"Input cfg" !call debug_spindet(Isomo,1) !call debug_spindet(Idomo,1) @@ -322,7 +328,9 @@ END_PROVIDER ! find out all pq holes possible nholes = 0 ! holes in SOMO - do i = 1,mo_num + !do i = 1,mo_num + do ii = 1,n_act_orb + i = list_act(ii) if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then nholes += 1 listholes(nholes) = i @@ -330,7 +338,9 @@ END_PROVIDER endif end do ! holes in DOMO - do i = 1,mo_num + !do i = 1,mo_num + do ii = 1,n_act_orb + i = list_act(ii) if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then nholes += 1 listholes(nholes) = i @@ -342,7 +352,9 @@ END_PROVIDER listvmos = -1 vmotype = -1 nvmos = 0 - do i = 1,mo_num + !do i = 1,mo_num + do ii = 1,n_act_orb + i = list_act(ii) !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then nvmos += 1 @@ -363,8 +375,8 @@ END_PROVIDER tableUniqueAlphas = .FALSE. ! Now find the allowed (p,q) excitations - Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) - Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) Nsomo_I = POPCNT(Isomo) if(Nsomo_I .EQ. 0) then kstart = 1 @@ -430,10 +442,10 @@ END_PROVIDER pqAlreadyGenQ = .FALSE. ! First check if it can be generated before do k = kstart, kend - diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) ndiffSOMO = POPCNT(diffSOMO) if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle - diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) ndiffDOMO = POPCNT(diffDOMO) nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) @@ -534,11 +546,11 @@ END_PROVIDER ! Check if this Icfg has been previously generated as a mono ppExistsQ = .False. - Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) - Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) do k = 1, idxI-1 - diffSOMO = IEOR(Isomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) - diffDOMO = IEOR(Idomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) ndiffSOMO = POPCNT(diffSOMO) ndiffDOMO = POPCNT(diffDOMO) diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f index 5e94e355..1c59d579 100644 --- a/src/csf/create_excitations.irp.f +++ b/src/csf/create_excitations.irp.f @@ -249,6 +249,7 @@ subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_single integer(bit_kind) :: Jdet(Nint,2) integer :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg + integer :: ii,kk integer(bit_kind) :: single(Nint,2) logical :: i_ok @@ -257,8 +258,12 @@ subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_single !TODO !Make list of Somo and Domo for holes !Make list of Unocc and Somo for particles - do i_hole = 1+n_core_orb, n_core_orb + n_act_orb - do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + !do i_hole = 1+n_core_orb, n_core_orb + n_act_orb + do ii = 1, n_act_orb + i_hole = list_act(ii) + !do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + do kk = 1, n_act_orb + i_particle = list_act(kk) if(i_hole .EQ. i_particle) cycle addcfg = -1 call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok) diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index d6fbe386..a13fc5da 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -33,7 +33,7 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, integer :: ndiffSOMO integer :: ndiffDOMO integer :: nxordiffSOMODOMO - integer :: ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes integer :: listholes(mo_num) integer :: holetype(mo_num) integer :: end_index @@ -146,7 +146,9 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, ! holes in SOMO Isomo = psi_configuration(1,1,i) Idomo = psi_configuration(1,2,i) - do ii = 1,mo_num + !do ii = 1,mo_num + do iii = 1,n_act_orb + ii = list_act(iii) if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then nholes += 1 listholes(nholes) = ii @@ -154,7 +156,9 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, endif end do ! holes in DOMO - do ii = 1,mo_num + !do ii = 1,mo_num + do iii = 1,n_act_orb + ii = list_act(iii) if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then nholes += 1 listholes(nholes) = ii @@ -204,7 +208,7 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI integer :: ndiffSOMO integer :: ndiffDOMO integer :: nxordiffSOMODOMO - integer :: ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes integer :: listholes(mo_num) integer :: holetype(mo_num) integer :: end_index @@ -335,7 +339,9 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI ! holes in SOMO Isomo = psi_configuration(1,1,i) Idomo = psi_configuration(1,2,i) - do ii = 1,mo_num + !do ii = 1,mo_num + do iii = 1,n_act_orb + ii = list_act(iii) if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then nholes += 1 listholes(nholes) = ii @@ -343,7 +349,9 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI endif end do ! holes in DOMO - do ii = 1,mo_num + !do ii = 1,mo_num + do iii = 1,n_act_orb + ii = list_act(iii) if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then nholes += 1 listholes(nholes) = ii diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 60865320..4420faec 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -810,7 +810,7 @@ subroutine calculate_preconditioner_cfg(diag_energies) ! the configurations in psi_configuration ! returns : diag_energies : END_DOC - integer :: i,j,k,l,p,q,noccp,noccq, ii, jj + integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj real*8,intent(out) :: diag_energies(n_CSF) integer :: nholes integer :: nvmos @@ -838,16 +838,19 @@ subroutine calculate_preconditioner_cfg(diag_energies) real*8 :: meCC real*8 :: ecore - PROVIDE h_core_ri + !PROVIDE h_core_ri + PROVIDE core_fock_operator + PROVIDE h_act_ri ! initialize energies diag_energies = 0.d0 + !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num ! calculate core energy !call get_core_energy(ecore) - !diag_energies = ecore + diag_energies = core_energy - nuclear_repulsion ! calculate the core energy - !print *,"Core energy=",ref_bitmask_energy + !print *,"Core 2energy=",ref_bitmask_energy do i=1,N_configuration @@ -863,8 +866,9 @@ subroutine calculate_preconditioner_cfg(diag_energies) ! find out all pq holes possible nholes = 0 ! holes in SOMO - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then nholes += 1 listholes(nholes) = k @@ -874,7 +878,9 @@ subroutine calculate_preconditioner_cfg(diag_energies) ! holes in DOMO !do k = n_core_orb+1,n_core_orb + n_act_orb !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb - do k = 1,mo_num + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then nholes += 1 listholes(nholes) = k @@ -887,7 +893,9 @@ subroutine calculate_preconditioner_cfg(diag_energies) vmotype = -1 nvmos = 0 !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then nvmos += 1 @@ -916,7 +924,7 @@ subroutine calculate_preconditioner_cfg(diag_energies) ! one-electron term if(p.EQ.q) then - hpp = noccq * h_core_ri(p,q)!mo_one_e_integrals(q,q) + hpp = noccq * h_act_ri(p,q)!mo_one_e_integrals(q,q) else hpp = 0.d0 endif @@ -1295,7 +1303,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze real*8,intent(in) :: psi_in(n_st,sze) real*8,intent(out) :: psi_out(n_st,sze) integer(bit_kind) :: Icfg(N_INT,2) - integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk + integer :: i,j,k,l,p,q,noccp,noccq, m, n, idxI, nocck,orbk + integer :: ii,jj,kk,ll,pp,qq integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg integer(bit_kind),dimension(:,:,:),allocatable :: singlesI @@ -1394,10 +1403,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,& !$OMP diagfac, tmpvar, diagfactors_0) & !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& - !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, AIJpqContainer,& + !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,& !$OMP sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& - !$OMP num_threads_max) + !$OMP n_core_orb, n_act_orb, list_act, num_threads_max) allocate(singlesI(N_INT,2,max(sze,10000))) allocate(idxs_singlesI(max(sze,10000))) @@ -1430,8 +1439,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! list_core ! list_core_inact ! bitmasks - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then nholes += 1 listholes(nholes) = k @@ -1439,8 +1449,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze endif enddo ! holes in DOMO - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then nholes += 1 listholes(nholes) = k @@ -1452,8 +1463,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze listvmos = -1 vmotype = -1 nvmos = 0 - !do k = n_core_orb+1,n_core_orb + n_act_orb - do k = 1,mo_num + do kk = 1,n_act_orb + !do k = 1,mo_num + k = list_act(kk) !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then nvmos += 1 @@ -1507,7 +1519,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze cnti = ii-starti+1 do jj = startj, endj cntj = jj-startj+1 - meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) + !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) + meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_act_ri(p,q) !print *,"jj = ",jj call omp_set_lock(lock(jj)) do kk = 1,n_st @@ -1532,6 +1545,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds_single) deallocate(excitationTypes_single) + !print *," psi(60,1)=",psi_out(1,60) + allocate(listconnectedJ(N_INT,2,max(sze,10000))) allocate(alphas_Icfg(N_INT,2,max(sze,10000))) allocate(connectedI_alpha(N_INT,2,max(sze,10000))) @@ -1711,6 +1726,8 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP END PARALLEL call omp_set_max_active_levels(4) + !print *," psi(60,1)=",psi_out(1,60) + !print *," diag_enregy=",diag_energies(1), " psi_out(1,1)=",psi_out(1,1) deallocate(diag_energies) deallocate(bit_tmp) diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f index b764a1a6..e048f6a6 100644 --- a/src/mo_two_e_ints/core_quantities.irp.f +++ b/src/mo_two_e_ints/core_quantities.irp.f @@ -13,7 +13,19 @@ BEGIN_PROVIDER [double precision, core_energy] core_energy += 2.d0 * (2.d0 * mo_two_e_integrals_jj(j,l) - mo_two_e_integrals_jj_exchange(j,l)) enddo enddo + !print *," core no nucl=",core_energy + ! core-active + do i = 1, n_core_orb + j = list_core(i) + !!! VJ + !!! TODO: Correct the loop over active electrons + do k = 1, (elec_num - 2*n_core_orb)/2 + l = k + n_core_orb + core_energy += 2.d0 * (2.d0 * mo_two_e_integrals_jj(j,l) - mo_two_e_integrals_jj_exchange(j,l)) + enddo + enddo core_energy += nuclear_repulsion + !print *," core no nucl=",core_energy END_PROVIDER @@ -59,3 +71,43 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ] enddo END_PROVIDER + +BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Active Hamiltonian with 3-index exchange integrals: + ! + ! $\tilde{h}{pq} = h_{pq} - \frac{1}{2}\sum_{k} g(pk,kq)$ + END_DOC + + integer :: i,j, k + integer :: p,q, r + ! core-core contribution + h_act_ri = core_fock_operator + ! act-act contribution + do p=1,n_act_orb + j=list_act(p) + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = mo_one_e_integrals(i,j) + enddo + do r=1,n_act_orb + k=list_act(r) + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + enddo + enddo + enddo + ! core-act contribution + do p=1,n_core_orb + j=list_core(p) + do k=1,mo_num + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + enddo + enddo + enddo +END_PROVIDER + From 92be856e50c5fdeee0d058434b7338d97d9bc4b5 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 17 Jun 2022 22:05:54 +0200 Subject: [PATCH 20/70] Frozen core is working. --- src/csf/sigma_vector.irp.f | 39 +++++++++++++++++++++---- src/davidson/diagonalization_hcfg.irp.f | 21 +++++++++++++ src/davidson/diagonalize_ci.irp.f | 2 ++ src/mo_two_e_ints/core_quantities.irp.f | 32 +++++++------------- 4 files changed, 68 insertions(+), 26 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 4420faec..8f9f99d3 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -837,6 +837,7 @@ subroutine calculate_preconditioner_cfg(diag_energies) real*8 :: hpp real*8 :: meCC real*8 :: ecore + real*8 :: core_act_contrib !PROVIDE h_core_ri PROVIDE core_fock_operator @@ -863,6 +864,8 @@ subroutine calculate_preconditioner_cfg(diag_energies) starti = psi_config_data(i,1) endi = psi_config_data(i,2) + core_act_contrib = 0.0d0 + ! find out all pq holes possible nholes = 0 ! holes in SOMO @@ -915,6 +918,13 @@ subroutine calculate_preconditioner_cfg(diag_energies) p = listholes(k) noccp = holetype(k) + + ! core-active + do l = 1, n_core_orb + jj = list_core(l) + core_act_contrib += noccp * (2.d0 * mo_two_e_integrals_jj(jj,p) - mo_two_e_integrals_jj_exchange(jj,p)) + enddo + ! Calculate one-electron ! and two-electron coulomb terms do l=1,nholes @@ -944,6 +954,10 @@ subroutine calculate_preconditioner_cfg(diag_energies) enddo enddo + !print *,"I=",i," core_act=",core_act_contrib + do j=starti,endi + diag_energies(j) += core_act_contrib + end do enddo end subroutine calculate_preconditioner_cfg @@ -1345,6 +1359,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze real*8, external :: get_two_e_integral real*8,dimension(:),allocatable:: diag_energies real*8 :: tmpvar, tmptot + real*8 :: core_act_contrib integer(omp_lock_kind), allocatable :: lock(:) call omp_set_max_active_levels(1) @@ -1404,9 +1419,10 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP diagfac, tmpvar, diagfactors_0) & !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,& - !$OMP sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & + !$OMP pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& - !$OMP n_core_orb, n_act_orb, list_act, num_threads_max) + !$OMP n_core_orb, n_act_orb, list_act, n, list_core, list_core_is_built,core_act_contrib, num_threads_max,& + !$OMP n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built) allocate(singlesI(N_INT,2,max(sze,10000))) allocate(idxs_singlesI(max(sze,10000))) @@ -1520,8 +1536,18 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do jj = startj, endj cntj = jj-startj+1 !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) - meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_act_ri(p,q) + core_act_contrib = 0.0d0 + if(p.ne.q)then + do pp=1,n_core_orb + n=list_core(pp) + core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map) + end do + endif + meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib) !print *,"jj = ",jj + !if(ii.eq.1 .and. jj.eq.177 )then + ! print *,"p=",p," q=",q," hact=",h_act_ri(p,q), " core_act=",core_act_contrib + !endif call omp_set_lock(lock(jj)) do kk = 1,n_st psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) @@ -1545,7 +1571,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds_single) deallocate(excitationTypes_single) - !print *," psi(60,1)=",psi_out(1,60) + !print *," singles part psi(1,177)=",psi_out(1,177) allocate(listconnectedJ(N_INT,2,max(sze,10000))) allocate(alphas_Icfg(N_INT,2,max(sze,10000))) @@ -1691,6 +1717,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do m = 1,colsikpq call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + !if((idxs_connectedI_alpha(j)+m-1).eq.177)then + ! print *,"CC=",CCmattmp(1,m) + !endif do kk = 1,n_st psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) enddo @@ -1714,6 +1743,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationTypes) deallocate(diagfactors) + !print *," psi(1,177)=",psi_out(1,177) ! Add the diagonal contribution !$OMP DO @@ -1726,7 +1756,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP END PARALLEL call omp_set_max_active_levels(4) - !print *," psi(60,1)=",psi_out(1,60) !print *," diag_enregy=",diag_energies(1), " psi_out(1,1)=",psi_out(1,1) deallocate(diag_energies) diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index bde2bdd9..193d103b 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -112,6 +112,7 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) double precision, allocatable :: tmpU(:,:), tmpW(:,:) double precision, pointer :: W(:,:), W_csf(:,:) + double precision, pointer :: W2(:,:), U2(:,:) logical :: disk_based double precision :: energy_shift(N_st_diag_in*davidson_sze_max) @@ -234,11 +235,13 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) + allocate(W2(sze,N_st_diag)) endif allocate( & ! Large U(sze,N_st_diag), & + U2(sze,N_st_diag), & U_csf(sze_csf,N_st_diag*itermax), & ! Small @@ -324,6 +327,10 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N tmpU(kk,ii) = U_csf(ii,shift+kk) enddo enddo + !do j=1,1 + ! print *,"====> J=",j + !tmpU=0.0d0 + !tmpU(1,j)=1.0d0 call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) do kk=1,N_st_diag do ii=1,sze_csf @@ -331,6 +338,20 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo enddo + !U_csf=0.0d0 + !U_csf(j,1)=1.0d0 + !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) + !call H_u_0_nstates_openmp(U2,U,N_st_diag,sze) + !call convertWFfromDETtoCSF(N_st_diag,U2(1,1),W2(1,1)) + !print *," w2=",W2(j,1) + !do i=1,sze_csf + ! print *, " i=",i,"qp=",W2(i,1)," my=",W_csf(i,1),dabs(dabs(W2(i,1))-dabs(W_csf(i,1))) + ! if(dabs(dabs(W2(i,1))-dabs(W_csf(i,1))) .ge. 1.0e-10)then + ! print *," somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i) + ! endif + !end do + !end do + !stop deallocate(tmpW) deallocate(tmpU) endif diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 78478c57..f5045913 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -72,7 +72,9 @@ END_PROVIDER if (diag_algorithm == "Davidson") then if (do_csf) then + !if (.true.) then if (sigma_vector_algorithm == 'det') then + !if (.false.) then call davidson_diag_H_csf(psi_det,CI_eigenvectors, & size(CI_eigenvectors,1),CI_electronic_energy, & N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f index e048f6a6..ef99bd5a 100644 --- a/src/mo_two_e_ints/core_quantities.irp.f +++ b/src/mo_two_e_ints/core_quantities.irp.f @@ -13,19 +13,7 @@ BEGIN_PROVIDER [double precision, core_energy] core_energy += 2.d0 * (2.d0 * mo_two_e_integrals_jj(j,l) - mo_two_e_integrals_jj_exchange(j,l)) enddo enddo - !print *," core no nucl=",core_energy - ! core-active - do i = 1, n_core_orb - j = list_core(i) - !!! VJ - !!! TODO: Correct the loop over active electrons - do k = 1, (elec_num - 2*n_core_orb)/2 - l = k + n_core_orb - core_energy += 2.d0 * (2.d0 * mo_two_e_integrals_jj(j,l) - mo_two_e_integrals_jj_exchange(j,l)) - enddo - enddo core_energy += nuclear_repulsion - !print *," core no nucl=",core_energy END_PROVIDER @@ -84,6 +72,7 @@ BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ] integer :: p,q, r ! core-core contribution h_act_ri = core_fock_operator + !print *,' Bef----hact(1,14)=',h_act_ri(4,14) ! act-act contribution do p=1,n_act_orb j=list_act(p) @@ -100,14 +89,15 @@ BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ] enddo enddo ! core-act contribution - do p=1,n_core_orb - j=list_core(p) - do k=1,mo_num - do q=1,n_act_orb - i=list_act(q) - h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) - enddo - enddo - enddo + !do p=1,n_act_orb + ! j=list_core(p) + ! do k=1,n_core_orb + ! do q=1,n_act_orb + ! i=list_act(q) + ! h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + ! enddo + ! enddo + !enddo + !print *,' Aft----hact(1,14)=',h_act_ri(4,14), mo_one_e_integrals(4,14) END_PROVIDER From 72a762c8e3456d0439e98448062b4195f02c3325 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 17 Jun 2022 22:41:20 +0200 Subject: [PATCH 21/70] Frozen core is working !. Some cleaning in sigma-vector. --- src/csf/sigma_vector.irp.f | 9 --------- src/davidson/diagonalize_ci.irp.f | 2 -- 2 files changed, 11 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 8f9f99d3..f631d1a8 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1480,7 +1480,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze vmotype = -1 nvmos = 0 do kk = 1,n_act_orb - !do k = 1,mo_num k = list_act(kk) !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then @@ -1544,10 +1543,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze end do endif meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib) - !print *,"jj = ",jj - !if(ii.eq.1 .and. jj.eq.177 )then - ! print *,"p=",p," q=",q," hact=",h_act_ri(p,q), " core_act=",core_act_contrib - !endif call omp_set_lock(lock(jj)) do kk = 1,n_st psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) @@ -1717,9 +1712,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze do m = 1,colsikpq call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) - !if((idxs_connectedI_alpha(j)+m-1).eq.177)then - ! print *,"CC=",CCmattmp(1,m) - !endif do kk = 1,n_st psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) enddo @@ -1756,7 +1748,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !$OMP END PARALLEL call omp_set_max_active_levels(4) - !print *," diag_enregy=",diag_energies(1), " psi_out(1,1)=",psi_out(1,1) deallocate(diag_energies) deallocate(bit_tmp) diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index f5045913..78478c57 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -72,9 +72,7 @@ END_PROVIDER if (diag_algorithm == "Davidson") then if (do_csf) then - !if (.true.) then if (sigma_vector_algorithm == 'det') then - !if (.false.) then call davidson_diag_H_csf(psi_det,CI_eigenvectors, & size(CI_eigenvectors,1),CI_electronic_energy, & N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) From 651b99b4312c83e4c5568d7d40a949d47fefc48c Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 17 Jun 2022 22:43:16 +0200 Subject: [PATCH 22/70] Removed debug allocate. --- src/davidson/diagonalization_hcfg.irp.f | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 193d103b..bde2bdd9 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -112,7 +112,6 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) double precision, allocatable :: tmpU(:,:), tmpW(:,:) double precision, pointer :: W(:,:), W_csf(:,:) - double precision, pointer :: W2(:,:), U2(:,:) logical :: disk_based double precision :: energy_shift(N_st_diag_in*davidson_sze_max) @@ -235,13 +234,11 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) - allocate(W2(sze,N_st_diag)) endif allocate( & ! Large U(sze,N_st_diag), & - U2(sze,N_st_diag), & U_csf(sze_csf,N_st_diag*itermax), & ! Small @@ -327,10 +324,6 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N tmpU(kk,ii) = U_csf(ii,shift+kk) enddo enddo - !do j=1,1 - ! print *,"====> J=",j - !tmpU=0.0d0 - !tmpU(1,j)=1.0d0 call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) do kk=1,N_st_diag do ii=1,sze_csf @@ -338,20 +331,6 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo enddo - !U_csf=0.0d0 - !U_csf(j,1)=1.0d0 - !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) - !call H_u_0_nstates_openmp(U2,U,N_st_diag,sze) - !call convertWFfromDETtoCSF(N_st_diag,U2(1,1),W2(1,1)) - !print *," w2=",W2(j,1) - !do i=1,sze_csf - ! print *, " i=",i,"qp=",W2(i,1)," my=",W_csf(i,1),dabs(dabs(W2(i,1))-dabs(W_csf(i,1))) - ! if(dabs(dabs(W2(i,1))-dabs(W_csf(i,1))) .ge. 1.0e-10)then - ! print *," somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i) - ! endif - !end do - !end do - !stop deallocate(tmpW) deallocate(tmpU) endif From bb901fcada346460164c500a74ebc51cd2885e99 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 17 Jun 2022 22:44:34 +0200 Subject: [PATCH 23/70] Cleaning. --- src/csf/configuration_CI_sigma_helpers.irp.f | 8 -------- src/csf/obtain_I_foralpha.irp.f | 4 ---- 2 files changed, 12 deletions(-) diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index 1a8c1d14..a5bc845e 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -61,7 +61,6 @@ use bitmasks ! find out all pq holes possible nholes = 0 ! holes in SOMO - !do i = 1,mo_num do ii = 1,n_act_orb i = list_act(ii) if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then @@ -71,7 +70,6 @@ use bitmasks endif end do ! holes in DOMO - !do i = 1,mo_num do ii = 1,n_act_orb i = list_act(ii) if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then @@ -85,7 +83,6 @@ use bitmasks listvmos = -1 vmotype = -1 nvmos = 0 - !do i = 1,mo_num do ii = 1,n_act_orb i = list_act(ii) if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then @@ -323,12 +320,9 @@ END_PROVIDER !call debug_spindet(Isomo,1) !call debug_spindet(Idomo,1) - !print*,n_act_orb, "monum=",mo_num," n_core=",n_core_orb - ! find out all pq holes possible nholes = 0 ! holes in SOMO - !do i = 1,mo_num do ii = 1,n_act_orb i = list_act(ii) if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then @@ -338,7 +332,6 @@ END_PROVIDER endif end do ! holes in DOMO - !do i = 1,mo_num do ii = 1,n_act_orb i = list_act(ii) if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then @@ -352,7 +345,6 @@ END_PROVIDER listvmos = -1 vmotype = -1 nvmos = 0 - !do i = 1,mo_num do ii = 1,n_act_orb i = list_act(ii) !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index a13fc5da..3926b908 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -146,7 +146,6 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, ! holes in SOMO Isomo = psi_configuration(1,1,i) Idomo = psi_configuration(1,2,i) - !do ii = 1,mo_num do iii = 1,n_act_orb ii = list_act(iii) if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then @@ -156,7 +155,6 @@ subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, endif end do ! holes in DOMO - !do ii = 1,mo_num do iii = 1,n_act_orb ii = list_act(iii) if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then @@ -339,7 +337,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI ! holes in SOMO Isomo = psi_configuration(1,1,i) Idomo = psi_configuration(1,2,i) - !do ii = 1,mo_num do iii = 1,n_act_orb ii = list_act(iii) if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then @@ -349,7 +346,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI endif end do ! holes in DOMO - !do ii = 1,mo_num do iii = 1,n_act_orb ii = list_act(iii) if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then From e27a2f4f18435ec41dd92d9f95518387a2f84dde Mon Sep 17 00:00:00 2001 From: v1j4y Date: Sat, 18 Jun 2022 12:24:30 +0200 Subject: [PATCH 24/70] Working on S > 0. --- src/csf/cfgCI_utils.c | 11 +++++++++++ src/csf/conversion.irp.f | 15 ++++++++++----- src/csf/sigma_vector.irp.f | 31 +++++++++++++++++++++---------- src/csf/tree_utils.c | 10 ++++++++++ src/davidson/diagonalize_ci.irp.f | 6 ++++-- 5 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index be4653a3..d55da188 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -342,7 +342,11 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl ************************************/ + //printf(" --- In convet ----\n"); convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); + //printf(" --- done bf det basis ---- row=%d col=%d\n",rowsbftodetI,colsbftodetI); + + //printRealMatrix(bftodetmatrixI,rowsbftodetI,colsbftodetI); int rowsI = 0; int colsI = 0; @@ -350,6 +354,8 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + //printf("Overlap matrix\n"); + //printRealMatrix(overlapMatrixI,rowsI,colsI); /*********************************** Get Orthonormalization Matrix @@ -359,6 +365,9 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl gramSchmidt(overlapMatrixI, rowsI, colsI, orthoMatrixI); + //printf("Ortho matrix\n"); + //printRealMatrix(orthoMatrixI,rowsI,colsI); + /*********************************** Get Final CSF to Det Matrix ************************************/ @@ -1367,7 +1376,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * int ndets = 0; int NBF = 0; double dNSOMO = NSOMO*1.0; + // MS = alpha_num - beta_num double nalpha = (NSOMO + MS)/2.0; + //printf(" in convertbftodet : MS=%d nalpha=%3.2f\n",MS,nalpha); ndets = (int)binom(dNSOMO, nalpha); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f index 75f6e539..9d6e5219 100644 --- a/src/csf/conversion.irp.f +++ b/src/csf/conversion.irp.f @@ -24,7 +24,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) double precision, intent(out) :: psi_coef_cfg_out(n_CSF,N_st) integer*8 :: Isomo, Idomo, mask integer(bit_kind) :: Ialpha(N_int) ,Ibeta(N_int) - integer :: rows, cols, i, j, k + integer :: rows, cols, i, j, k, salpha integer :: startdet, enddet integer :: ndetI integer :: getNSOMO @@ -65,13 +65,15 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) enddo if(iand(s,1) .EQ. 0) then - bfIcfg = max(1,nint((binom(s,s/2)-binom(s,(s/2)+1)))) + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) else - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) endif ! perhaps blocking with CFGs of same seniority ! can be more efficient + print *,"bfIcfg=",bfIcfg, " ndetI=",ndetI allocate(tempBuffer(bfIcfg,ndetI)) tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) @@ -99,7 +101,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) double precision,intent(in) :: psi_coef_cfg_in(n_CSF,N_st) double precision,intent(out) :: psi_coef_det(N_det,N_st) double precision :: tmp_psi_coef_det(maxDetDimPerBF,N_st) - integer :: s, bfIcfg + integer :: s, bfIcfg, salpha integer :: countcsf integer(bit_kind) :: Ialpha(N_int), Ibeta(N_int) integer :: rows, cols, i, j, k @@ -110,6 +112,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) double precision,allocatable :: tempCoeff (:,:) double precision :: phasedet integer :: idx + integer MS + MS = elec_alpha_num-elec_beta_num countcsf = 0 @@ -123,7 +127,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) if (psi_configuration(k,1,i) == 0_bit_kind) cycle s = s + popcnt(psi_configuration(k,1,i)) enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) allocate(tempCoeff(bfIcfg,N_st)) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index f631d1a8..60c4631e 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -20,12 +20,15 @@ ! The maximum number of SOMOs for the current calculation. ! required for the calculation of prototype arrays. END_DOC + integer MS, ialpha + MS = elec_alpha_num-elec_beta_num NSOMOMax = min(elec_num, cfg_nsomo_max + 2) NSOMOMin = max(0,cfg_nsomo_min-2) ! Note that here we need NSOMOMax + 2 sizes - NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0 + ialpha = (NSOMOMax + MS)/2 + NCSFMax = max(1,nint((binom(NSOMOMax,ialpha)-binom(NSOMOMax,ialpha+1)))) ! TODO: NCSFs for MS=0 (CHECK) NBFMax = NCSFMax - maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)))) + maxDetDimPerBF = max(1,nint((binom(NSOMOMax,ialpha)))) NMO = n_act_orb integer i,j,k,l integer startdet,enddet @@ -34,11 +37,9 @@ integer dimcsfpercfg integer detDimperBF real*8 :: coeff, binom1, binom2 - integer MS integer ncfgpersomo real*8, external :: logabsgamma detDimperBF = 0 - MS = elec_alpha_num-elec_beta_num ! number of cfgs = number of dets for 0 somos n_CSF = cfg_seniority_index(NSOMOMin)-1 ncfgprev = cfg_seniority_index(NSOMOMin) @@ -105,7 +106,11 @@ ! endif !enddo n_CSF = 0 - ncfgprev = cfg_seniority_index(0) ! should be 1 + print *," -9(((((((((((((( NSOMOMin=",NSOMOMin + ncfgprev = cfg_seniority_index(NSOMOMin) ! can be -1 + if(ncfgprev.eq.-1)then + ncfgprev=1 + endif do i=NSOMOMin,NSOMOMax+2,2 !k=0 !do while((cfg_seniority_index(i+2+k) .eq. -1) .and. (k.le.NSOMOMax)) @@ -126,9 +131,11 @@ dimcsfpercfg = 2 else if(iand(MS,1) .EQ. 0) then - dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) + ialpha = (i + MS)/2 + dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1)))) else - dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) + ialpha = (i + MS)/2 + dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1)))) endif endif n_CSF += ncfg*dimcsfpercfg @@ -212,7 +219,7 @@ end subroutine get_phase_qp_to_cfg integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int) integer :: rows, cols, i, j, k integer :: startdet, enddet, idx - integer*8 MS + integer*8 MS, salpha integer ndetI integer :: getNSOMO real*8,dimension(:,:),allocatable :: tempBuffer @@ -231,8 +238,11 @@ end subroutine get_phase_qp_to_cfg Isomo = IBSET(0_8, i) - 1_8 ! rows = Ncsfs ! cols = Ndets - bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) - ndetI = max(1,nint((binom(i,(i+1)/2)))) + salpha = (i+MS)/2 + bfIcfg = max(1,nint((binom(i,salpha)-binom(i,salpha+1)))) + ndetI = max(1,nint((binom(i,salpha)))) + !bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) + !ndetI = max(1,nint((binom(i,(i+1)/2)))) allocate(tempBuffer(bfIcfg,ndetI)) call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) @@ -1364,6 +1374,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze integer(omp_lock_kind), allocatable :: lock(:) call omp_set_max_active_levels(1) + print *," sze = ",sze allocate(lock(sze)) do i=1,sze call omp_init_lock(lock(i)) diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c index 1266b890..26cb15e0 100644 --- a/src/csf/tree_utils.c +++ b/src/csf/tree_utils.c @@ -311,3 +311,13 @@ void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int col break; } } + +void printRealMatrix(double *orthoMatrix, int rows, int cols){ + int i,j; + for(i=0;i Date: Sat, 18 Jun 2022 17:31:44 +0200 Subject: [PATCH 25/70] S>0 runs. Need to check energy. --- src/csf/cfgCI_utils.c | 4 +- src/csf/configuration_CI_sigma_helpers.irp.f | 40 +++++++++++--------- src/csf/conversion.irp.f | 1 - src/csf/sigma_vector.irp.f | 11 ++++-- src/davidson/diagonalization_hcfg.irp.f | 11 ++++++ src/davidson/diagonalize_ci.irp.f | 6 +-- 6 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index d55da188..6094f5bb 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -32,8 +32,8 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){ } void getncsfs(int NSOMO, int MS, int *outncsfs){ - int nparcoupl = (NSOMO + MS)/2; - int nparcouplp1 = ((NSOMO + MS)/2)+1; + int nparcoupl = (NSOMO + MS)/2; // n_alpha + int nparcouplp1 = ((NSOMO + MS)/2)+1; // n_alpha + 1 double tmpndets=0.0; if(NSOMO == 0){ (*outncsfs) = 1; diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index a5bc845e..8d447818 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -107,7 +107,7 @@ use bitmasks if(Nsomo_I .EQ. 0) then kstart = 1 else - kstart = cfg_seniority_index(max(0,Nsomo_I-2)) + kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) endif kend = idxI-1 @@ -121,14 +121,14 @@ use bitmasks Jsomo = IBCLR(Isomo,p-1) Jsomo = IBSET(Jsomo,q-1) Jdomo = Idomo - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) kend = idxI-1 else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then ! SOMO -> SOMO Jsomo = IBCLR(Isomo,p-1) Jsomo = IBCLR(Jsomo,q-1) Jdomo = IBSET(Idomo,q-1) - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-4))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) kend = idxI-1 else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then ! DOMO -> VMO @@ -143,7 +143,7 @@ use bitmasks Jsomo = IBCLR(Jsomo,q-1) Jdomo = IBCLR(Idomo,p-1) Jdomo = IBSET(Jdomo,q-1) - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) kend = idxI-1 else print*,"Something went wrong in obtain_associated_alphaI" @@ -227,11 +227,13 @@ use bitmasks endif ! SOMO - NalphaIcfg += 1 - !print *,i,j,"|",NalphaIcfg - alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo - alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) - NalphaIcfg_list(idxI) = NalphaIcfg + !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + if(POPCNT(Jsomo) .ge. NSOMOMin) then + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo + alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + NalphaIcfg_list(idxI) = NalphaIcfg + endif endif end do end do @@ -240,7 +242,7 @@ use bitmasks ppExistsQ = .False. Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) do k = kstart, idxI-1 diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) ndiffSOMO = POPCNT(diffSOMO) @@ -257,10 +259,12 @@ use bitmasks ! Diagonal part (pp,qq) if(nholes > 0 .AND. (.NOT. ppExistsQ))then ! SOMO - NalphaIcfg += 1 - alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1) - alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2) - NalphaIcfg_list(idxI) = NalphaIcfg + if(POPCNT(Jsomo) .ge. NSOMOMin) then + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1) + alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2) + NalphaIcfg_list(idxI) = NalphaIcfg + endif endif NalphaIcfg = 0 @@ -373,7 +377,7 @@ END_PROVIDER if(Nsomo_I .EQ. 0) then kstart = 1 else - kstart = cfg_seniority_index(max(0,Nsomo_I-2)) + kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) endif kend = idxI-1 !print *,"Isomo" @@ -398,14 +402,14 @@ END_PROVIDER Jsomo = IBCLR(Isomo,p-1) Jsomo = IBSET(Jsomo,q-1) Jdomo = Idomo - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) kend = idxI-1 else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then ! SOMO -> SOMO Jsomo = IBCLR(Isomo,p-1) Jsomo = IBCLR(Jsomo,q-1) Jdomo = IBSET(Idomo,q-1) - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-4))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) kend = idxI-1 else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then ! DOMO -> VMO @@ -420,7 +424,7 @@ END_PROVIDER Jsomo = IBCLR(Jsomo,q-1) Jdomo = IBCLR(Idomo,p-1) Jdomo = IBSET(Jdomo,q-1) - kstart = max(1,cfg_seniority_index(max(0,Nsomo_I-2))) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) kend = idxI-1 else print*,"Something went wrong in obtain_associated_alphaI" diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f index 9d6e5219..9ddd4fee 100644 --- a/src/csf/conversion.irp.f +++ b/src/csf/conversion.irp.f @@ -73,7 +73,6 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) ! perhaps blocking with CFGs of same seniority ! can be more efficient - print *,"bfIcfg=",bfIcfg, " ndetI=",ndetI allocate(tempBuffer(bfIcfg,ndetI)) tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 60c4631e..c9299cae 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -234,7 +234,7 @@ end subroutine get_phase_qp_to_cfg ! initialization psi_coef_config = 0.d0 DetToCSFTransformationMatrix(0,:,:) = 1.d0 - do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2 + do i = 2-iand(MS,1), NSOMOMax,2 Isomo = IBSET(0_8, i) - 1_8 ! rows = Ncsfs ! cols = Ndets @@ -287,7 +287,9 @@ end subroutine get_phase_qp_to_cfg if (psi_configuration(k,1,i) == 0_bit_kind) cycle s = s + popcnt(psi_configuration(k,1,i)) enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + salpha = (s+MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + !bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) ! perhaps blocking with CFGs of same seniority ! can be more efficient @@ -1374,7 +1376,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze integer(omp_lock_kind), allocatable :: lock(:) call omp_set_max_active_levels(1) - print *," sze = ",sze + !print *," sze = ",sze allocate(lock(sze)) do i=1,sze call omp_init_lock(lock(i)) @@ -1383,7 +1385,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! print *,"i=",i," psi_cfg_data_1=",psi_config_data(i,1)," psi_cfg_data_2=",psi_config_data(i,2) !end do - !print *," sze = ",sze allocate(diag_energies(n_CSF)) call calculate_preconditioner_cfg(diag_energies) @@ -1639,6 +1640,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze totcolsTKI = 0 rowsTKI = -1 NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) + !print *,"alphas_Icfg=",alphas_Icfg(1,1,k) do j = 1,nconnectedI NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) p = excitationIds(1,j) @@ -1648,6 +1650,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze ! for E_pp E_rs and E_ppE_rr case rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype totcolsTKI += colsikpq rowsTKI = rowsikpq enddo diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index bde2bdd9..92097a2f 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -324,6 +324,7 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N tmpU(kk,ii) = U_csf(ii,shift+kk) enddo enddo + !tmpU(1,1)=1.0d0 call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) do kk=1,N_st_diag do ii=1,sze_csf @@ -331,6 +332,16 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo enddo + !U_csf = 0.0d0 + !U_csf(1,1) = 1.0d0 + !u_in = 0.0d0 + !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2) + !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze) + !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1)) + !do i=1,sze_csf + ! print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1), " diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + !end do + !stop deallocate(tmpW) deallocate(tmpU) endif diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 42059e98..78478c57 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -71,10 +71,8 @@ END_PROVIDER if (diag_algorithm == "Davidson") then - !if (do_csf) then - if (.true.) then - !if (sigma_vector_algorithm == 'det') then - if (.false.) then + if (do_csf) then + if (sigma_vector_algorithm == 'det') then call davidson_diag_H_csf(psi_det,CI_eigenvectors, & size(CI_eigenvectors,1),CI_electronic_energy, & N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) From 2e9f539827ad7aa208f2ce0dd52291a18ed44c98 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Mon, 20 Jun 2022 19:55:44 +0200 Subject: [PATCH 26/70] Fixed some bugs for S>0. --- src/csf/configuration_CI_sigma_helpers.irp.f | 22 ++++++++++++++++++-- src/csf/obtain_I_foralpha.irp.f | 22 ++++++++++++++------ src/csf/sigma_vector.irp.f | 7 ++++--- src/davidson/diagonalization_hcfg.irp.f | 5 ++++- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index 8d447818..cea7640c 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -44,10 +44,13 @@ use bitmasks logical :: pqAlreadyGenQ logical :: pqExistsQ logical :: ppExistsQ + integer*8 :: MS double precision :: t0, t1 call wall_time(t0) + MS = elec_alpha_num-elec_beta_num + allocate(tableUniqueAlphas(mo_num,mo_num)) NalphaIcfg_list = 0 @@ -128,8 +131,13 @@ use bitmasks Jsomo = IBCLR(Isomo,p-1) Jsomo = IBCLR(Jsomo,q-1) Jdomo = IBSET(Idomo,q-1) - kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) - kend = idxI-1 + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Jsomo).ge.MS)then + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else + cycle + endif else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then ! DOMO -> VMO Jsomo = IBSET(Isomo,p-1) @@ -148,6 +156,10 @@ use bitmasks else print*,"Something went wrong in obtain_associated_alphaI" endif + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Jsomo).lt.MS)then + cycle + endif ! Again, we don't have to search from 1 ! we just use seniority to find the @@ -211,6 +223,12 @@ use bitmasks Jsomo = IBCLR(Isomo,p-1) Jsomo = IBCLR(Jsomo,q-1) Jdomo = IBSET(Idomo,q-1) + if(POPCNT(Jsomo).ge.MS)then + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else + cycle + endif else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then ! DOMO -> VMO Jsomo = IBSET(Isomo,p-1) diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f index 3926b908..7d7ae09b 100644 --- a/src/csf/obtain_I_foralpha.irp.f +++ b/src/csf/obtain_I_foralpha.irp.f @@ -205,12 +205,14 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI integer*8 :: xordiffSOMODOMO integer :: ndiffSOMO integer :: ndiffDOMO - integer :: nxordiffSOMODOMO - integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes - integer :: listholes(mo_num) - integer :: holetype(mo_num) - integer :: end_index - integer :: Nsomo_alpha + integer :: nxordiffSOMODOMO + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + integer*8 :: MS + MS = elec_alpha_num-elec_beta_num nconnectedI = 0 end_index = N_configuration @@ -233,6 +235,10 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI Idomo = Ialpha(1,2) Jsomo = psi_configuration(1,1,i) Jdomo = psi_configuration(1,2,i) + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Isomo).lt.MS)then + cycle + endif diffSOMO = IEOR(Isomo,Jsomo) ndiffSOMO = POPCNT(diffSOMO) !if(idxI.eq.1)then @@ -299,6 +305,10 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI !IRP_ELSE p = TRAILZ(Isomo) + 1 !IRP_ENDIF + ! Check for Minimal alpha electrons (MS) + !if(POPCNT(Isomo).lt.MS)then + ! cycle + !endif end if case (2) ! DOMO -> SOMO diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index c9299cae..1c2357b2 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1578,7 +1578,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds_single) deallocate(excitationTypes_single) - !print *," singles part psi(1,177)=",psi_out(1,177) + !print *," singles part psi(1,5)=",psi_out(1,5) allocate(listconnectedJ(N_INT,2,max(sze,10000))) allocate(alphas_Icfg(N_INT,2,max(sze,10000))) @@ -1622,6 +1622,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) ! TODO : remove doubly excited for return + !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5) do k = 1,Nalphas_Icfg ! Now generate all singly excited with respect to a given alpha CFG @@ -1640,7 +1641,6 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze totcolsTKI = 0 rowsTKI = -1 NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) - !print *,"alphas_Icfg=",alphas_Icfg(1,1,k) do j = 1,nconnectedI NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) p = excitationIds(1,j) @@ -1690,6 +1690,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze moj = excitationIds(2,l) ! s mol = excitationIds(1,l) ! r diagfac = diagfactors_0 * diagfactors(l)* mo_two_e_integral(mok,mol,moi,moj)! g(pq,sr) = + !print *,"p=",mok,"q=",mol,"r=",moi,"s=",moj do m = 1,colsikpq ! = (ik|jl) GIJpqrs(totcolsTKI+m,l) = diagfac @@ -1749,7 +1750,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationTypes) deallocate(diagfactors) - !print *," psi(1,177)=",psi_out(1,177) + !print *," psi(1,823)=",psi_out(1,823), " g(1 8, 3 15)=",mo_two_e_integral(1,8,3,15), " ncore=",n_core_orb ! Add the diagonal contribution !$OMP DO diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 92097a2f..7bd5dd8d 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -339,7 +339,10 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze) !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1)) !do i=1,sze_csf - ! print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1), " diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then + ! print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! endif !end do !stop deallocate(tmpW) From 0394865bf693942d34f4d52e5c50083eeadc300c Mon Sep 17 00:00:00 2001 From: v1j4y Date: Tue, 21 Jun 2022 11:36:43 +0200 Subject: [PATCH 27/70] Added timings. --- src/davidson/diagonalization_hcfg.irp.f | 7 +++++++ src/davidson/diagonalization_hs2_dressed.irp.f | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 7bd5dd8d..08645b53 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -325,7 +325,14 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N enddo enddo !tmpU(1,1)=1.0d0 + double precision :: irp_rdtsc + double precision :: ticks_0, ticks_1 + integer*8 :: irp_imax + irp_imax = 1 + ticks_0 = irp_rdtsc() call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + ticks_1 = irp_rdtsc() + print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" do kk=1,N_st_diag do ii=1,sze_csf W_csf(ii,shift+kk)=tmpW(kk,ii) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..2a4f53d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -356,7 +356,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if ((sze > 100000).and.distributed_davidson) then call H_S2_u_0_nstates_zmq (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) else + double precision :: irp_rdtsc + double precision :: ticks_0, ticks_1 + integer*8 :: irp_imax + irp_imax = 1 + ticks_0 = irp_rdtsc() call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + ticks_1 = irp_rdtsc() + print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" endif S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) ! else From bda97b39d6b2ee7d1a20b6138ba9886244e72c03 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Sat, 9 Jul 2022 09:00:29 +0200 Subject: [PATCH 28/70] Working on LAPACK gramSchmidt. Not working with naive dgeqrf. --- src/csf/cfgCI_interface.f90 | 18 +++++++ src/csf/cfgCI_utils.c | 20 +++++++ src/csf/tree_utils.h | 1 + src/utils/linear_algebra.irp.f | 97 ++++++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+) diff --git a/src/csf/cfgCI_interface.f90 b/src/csf/cfgCI_interface.f90 index b701f0ec..73bd600d 100644 --- a/src/csf/cfgCI_interface.f90 +++ b/src/csf/cfgCI_interface.f90 @@ -46,6 +46,24 @@ module cfunctions real (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax) end subroutine getCSFtoDETTransformationMatrix end interface + interface + subroutine gramSchmidt(A, m, n, B) bind(C, name='gramSchmidt') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT32_T),value,intent(in) :: m + integer(kind=C_INT32_T),value,intent(in) :: n + real (kind=C_DOUBLE ),intent(in) :: A(m,n) + real (kind=C_DOUBLE ),intent(out) :: B(m,n) + end subroutine gramSchmidt + end interface + interface + subroutine gramSchmidt_qp(A, m, n, B) bind(C, name='gramSchmidt_qp') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT32_T),value,intent(in) :: m + integer(kind=C_INT32_T),value,intent(in) :: n + real (kind=C_DOUBLE ),intent(in) :: A(m,n) + real (kind=C_DOUBLE ),intent(out) :: B(m,n) + end subroutine gramSchmidt_qp + end interface end module cfunctions subroutine f_dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) & diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 6094f5bb..64593159 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -252,6 +252,26 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM buildTreeDriver(bftree, *NSOMO, MS, NBF); } +void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols); + +void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){ + int i,j; + //for(j=0;j 2147483648 + LWORK=max(n,int(WORK(1))) + + deallocate(WORK) + allocate(WORK(LWORK)) + call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) + print *,A + print *,jpvt + deallocate(WORK,TAU) + !stop + + !LWORK=-1 + !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) + !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 + !LWORK=max(n,int(WORK(1))) + + !deallocate(WORK) + !allocate(WORK(LWORK)) + !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) + + !LWORK=-1 + !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) + !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 + !LWORK=max(n,int(WORK(1))) + + !deallocate(WORK) + !allocate(WORK(LWORK)) + !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) + ! + !allocate(C(LDA,n)) + !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA) + !norm = 0.0d0 + !B = 0.0d0 + !!print *,C + !do i=1,m + ! norm = 0.0d0 + ! do j=1,n + ! norm = norm + C(j,i)*C(j,i) + ! end do + ! norm = 1.0d0/dsqrt(norm) + ! do j=1,n + ! B(j,i) = C(j,i) + ! end do + !end do + !print *,B + + + !deallocate(WORK,TAU) +end + +subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf") + use iso_c_binding + integer(c_int32_t), value :: LDA + integer(c_int32_t), value :: m + integer(c_int32_t), value :: n + integer(c_int16_t) :: A(LDA,n) + integer(c_int16_t) :: B(LDA,n) + call ortho_qr_withB(A,LDA,B,m,n) +end subroutine ortho_qr_csf + subroutine ortho_qr(A,LDA,m,n) implicit none BEGIN_DOC From 85db84df23cf98161deb7238facae6b160cdee8a Mon Sep 17 00:00:00 2001 From: v1j4y Date: Wed, 14 Sep 2022 15:07:34 +0200 Subject: [PATCH 29/70] Triplet seems to be working. Doublet needs work. --- src/csf/cfgCI_utils.c | 65 +++++++++++++------ src/csf/conversion.irp.f | 1 + src/csf/sigma_vector.irp.f | 27 +++++--- src/csf/tree_utils.c | 4 ++ src/davidson/diagonalization_hcfg.irp.f | 8 ++- .../diagonalization_hs2_dressed.irp.f | 10 ++- 6 files changed, 82 insertions(+), 33 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 64593159..ad3af92e 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1,5 +1,6 @@ #include #include +#include #include "tree_utils.h" void int_to_bin_digit(int64_t in, int count, int* out) @@ -28,7 +29,7 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){ int nsomo = *inpnsomo; int ms = *inpms; int nparcoupl = (nsomo + ms)/2; - *outncsfs = binom(nsomo, nparcoupl); + *outncsfs = binom((double)nsomo, (double)nparcoupl); } void getncsfs(int NSOMO, int MS, int *outncsfs){ @@ -39,8 +40,8 @@ void getncsfs(int NSOMO, int MS, int *outncsfs){ (*outncsfs) = 1; return; } - tmpndets = binom(NSOMO, nparcoupl); - (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1)); + tmpndets = binom((double)NSOMO, (double)nparcoupl); + (*outncsfs) = round(tmpndets - binom((double)NSOMO, (double)nparcouplp1)); } #include @@ -1392,14 +1393,23 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *rows, int *cols){ int NSOMO=0; + //printf("before getSetBits Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO); getSetBits(Isomo, &NSOMO); + //printf("Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO); int ndets = 0; int NBF = 0; - double dNSOMO = NSOMO*1.0; + //double dNSOMO = NSOMO*1.0; // MS = alpha_num - beta_num - double nalpha = (NSOMO + MS)/2.0; + int nalpha = (NSOMO + MS)/2; //printf(" in convertbftodet : MS=%d nalpha=%3.2f\n",MS,nalpha); - ndets = (int)binom(dNSOMO, nalpha); + //ndets = (int)binom(dNSOMO, nalpha); + if(NSOMO > 0){ + ndets = (int)binom((double)NSOMO, (double)nalpha); + } + else if(NSOMO == 0){ + ndets = 1; + } + else printf("Something is wrong in calcMEdetpair\n"); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; dettree.rootNode = malloc(sizeof(Node)); @@ -1420,16 +1430,6 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * } else{ - //int addr = -1; - //int inpdet[NSOMO]; - //inpdet[0] = 1; - //inpdet[1] = 1; - //inpdet[2] = 1; - //inpdet[3] = 0; - //inpdet[4] = 0; - //inpdet[5] = 0; - - //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr); int detlist[ndets]; getDetlistDriver(&dettree, NSOMO, detlist); @@ -1442,6 +1442,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO); // Initialize transformation matrix + //printf("MS=%d NBF=%d ndets=%d NSOMO=%d\n",MS,NBF,ndets,NSOMO); + assert( NBF > 0); + assert( ndets > 0); (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double)); (*rows) = NBF; (*cols) = ndets; @@ -1496,9 +1499,10 @@ void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int co getSetBits(Isomo, &NSOMO); int ndets = 0; int NBF = 0; - double dNSOMO = NSOMO*1.0; - double nalpha = (NSOMO + MS)/2.0; - ndets = (int)binom(dNSOMO, nalpha); + //double dNSOMO = NSOMO*1.0; + //double nalpha = (NSOMO + MS)/2.0; + int nalpha = (NSOMO + MS)/2; + ndets = (int)binom((double)NSOMO, (double)nalpha); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; dettree.rootNode = malloc(sizeof(Node)); @@ -1582,6 +1586,7 @@ void getApqIJMatrixDims(int64_t Isomo, int64_t Jsomo, int64_t MS, int32_t *rowso getncsfs(NSOMOJ, MS, &NBFJ); (*rowsout) = NBFI; (*colsout) = NBFJ; + //exit(0); } void getApqIJMatrixDriver(int64_t Isomo, int64_t Jsomo, int orbp, int orbq, int64_t MS, int64_t NMO, double **CSFICSFJApqIJptr, int *rowsout, int *colsout){ @@ -1700,6 +1705,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsbftodetI, colsbftodetI; + //printf(" 1Calling convertBFtoDetBasis Isomo=%ld MS=%ld\n",Isomo,MS); convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); // Fill matrix @@ -1707,7 +1713,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int colsI = 0; //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + //printf("Isomo=%ld\n",Isomo); getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + if(Isomo == 0){ + rowsI = 1; + colsI = 1; + } + + //printf("Isomo=%ld\n",Isomo); orthoMatrixI = malloc(rowsI*colsI*sizeof(double)); @@ -1719,6 +1732,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsbftodetJ, colsbftodetJ; + //printf(" 2Calling convertBFtoDetBasis Jsomo=%ld MS=%ld\n",Jsomo,MS); convertBFtoDetBasis(Jsomo, MS, &bftodetmatrixJ, &rowsbftodetJ, &colsbftodetJ); int rowsJ = 0; @@ -1726,6 +1740,10 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in // Fill matrix //getOverlapMatrix(Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO); getOverlapMatrix_withDet(bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO); + if(Jsomo == 0){ + rowsJ = 1; + colsJ = 1; + } orthoMatrixJ = malloc(rowsJ*colsJ*sizeof(double)); @@ -1743,18 +1761,25 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int transA=false; int transB=false; + //printf("1Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsbftodetI,colsbftodetI,rowsA,colsA); callBlasMatxMat(bftodetmatrixI, rowsbftodetI, colsbftodetI, ApqIJ, rowsA, colsA, bfIApqIJ, transA, transB); + //printf("done\n"); // now transform I in csf basis double *CSFIApqIJ = malloc(rowsI*colsA*sizeof(double)); transA = false; transB = false; + //printf("2Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsI,colsI,colsA); callBlasMatxMat(orthoMatrixI, rowsI, colsI, bfIApqIJ, colsI, colsA, CSFIApqIJ, transA, transB); // now transform J in BF basis double *CSFIbfJApqIJ = malloc(rowsI*rowsbftodetJ*sizeof(double)); transA = false; transB = true; + //printf("3Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsA,rowsbftodetJ,colsbftodetJ); callBlasMatxMat(CSFIApqIJ, rowsI, colsA, bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, CSFIbfJApqIJ, transA, transB); // now transform J in CSF basis @@ -1765,6 +1790,8 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in double *tmpCSFICSFJApqIJ = malloc(rowsI*rowsJ*sizeof(double)); transA = false; transB = true; + //printf("4Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,rowsbftodetJ,rowsJ,colsJ); callBlasMatxMat(CSFIbfJApqIJ, rowsI, rowsbftodetJ, orthoMatrixJ, rowsJ, colsJ, tmpCSFICSFJApqIJ, transA, transB); // Transfer to actual buffer in Fortran order for(int i = 0; i < rowsI; i++) diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f index 9ddd4fee..494c3bfa 100644 --- a/src/csf/conversion.irp.f +++ b/src/csf/conversion.irp.f @@ -68,6 +68,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) salpha = (s + MS)/2 bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) else + salpha = (s + MS)/2 bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) endif diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 1c2357b2..8f6e27e8 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -23,7 +23,13 @@ integer MS, ialpha MS = elec_alpha_num-elec_beta_num NSOMOMax = min(elec_num, cfg_nsomo_max + 2) - NSOMOMin = max(0,cfg_nsomo_min-2) + print *,'cfg_nsomo_min=',cfg_nsomo_min + print *,'cfg_nsomo_max=',cfg_nsomo_max + if(AND(cfg_nsomo_min , 1) .eq. 0)then + NSOMOMin = max(0,cfg_nsomo_min-2) + else + NSOMOMin = max(1,cfg_nsomo_min-2) + endif ! Note that here we need NSOMOMax + 2 sizes ialpha = (NSOMOMax + MS)/2 NCSFMax = max(1,nint((binom(NSOMOMax,ialpha)-binom(NSOMOMax,ialpha+1)))) ! TODO: NCSFs for MS=0 (CHECK) @@ -552,7 +558,7 @@ end subroutine get_phase_qp_to_cfg rows = -1 cols = -1 integer*8 MS - MS = 0 + MS = elec_alpha_num-elec_beta_num real*8,dimension(:,:),allocatable :: meMatrix integer maxdim @@ -931,7 +937,7 @@ subroutine calculate_preconditioner_cfg(diag_energies) noccp = holetype(k) - ! core-active + ! core-virtual do l = 1, n_core_orb jj = list_core(l) core_act_contrib += noccp * (2.d0 * mo_two_e_integrals_jj(jj,p) - mo_two_e_integrals_jj_exchange(jj,p)) @@ -1387,6 +1393,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze allocate(diag_energies(n_CSF)) call calculate_preconditioner_cfg(diag_energies) + !print *," diag energy =",diag_energies(1) MS = 0 norm_coef_cfg=0.d0 @@ -1549,12 +1556,15 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) core_act_contrib = 0.0d0 if(p.ne.q)then - do pp=1,n_core_orb - n=list_core(pp) - core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map) - end do + do pp=1,n_core_orb + n=list_core(pp) + core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map) + end do endif meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib) + if(jj.eq.1.and.ii.eq.1)then + print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q + endif call omp_set_lock(lock(jj)) do kk = 1,n_st psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) @@ -1578,7 +1588,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(excitationIds_single) deallocate(excitationTypes_single) - !print *," singles part psi(1,5)=",psi_out(1,5) + !print *," singles part psi(1,1)=",psi_out(1,1) allocate(listconnectedJ(N_INT,2,max(sze,10000))) allocate(alphas_Icfg(N_INT,2,max(sze,10000))) @@ -1751,6 +1761,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze deallocate(diagfactors) !print *," psi(1,823)=",psi_out(1,823), " g(1 8, 3 15)=",mo_two_e_integral(1,8,3,15), " ncore=",n_core_orb + !print *," psi(1,1)=",psi_out(1,1) ! Add the diagonal contribution !$OMP DO diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c index 26cb15e0..91bae8b2 100644 --- a/src/csf/tree_utils.c +++ b/src/csf/tree_utils.c @@ -1,3 +1,4 @@ +#include #include "tree_utils.h" void buildTree(Tree *bftree, @@ -52,6 +53,7 @@ void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){ int icpl = 0; // keep track of the ith ms (cannot be -ve) int addr = 0; // Counts the total BF's + assert(bftree->rootNode->addr == 0); buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS); *NBF = bftree->rootNode->addr; @@ -264,6 +266,8 @@ void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){ int NSOMO=0; getSetBits(Isomo, &NSOMO); genDetsDriver(dettree, NSOMO, MS, ndets); + // Closed shell case + if(NSOMO==0) (*ndets) = 1; } diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 08645b53..7da2e1d2 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -329,10 +329,12 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision :: ticks_0, ticks_1 integer*8 :: irp_imax irp_imax = 1 - ticks_0 = irp_rdtsc() + !ticks_0 = irp_rdtsc() call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) - ticks_1 = irp_rdtsc() - print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" + !ticks_1 = irp_rdtsc() + !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" + !print *,' tmpW(1,1)=',tmpW(1,1) + !stop do kk=1,N_st_diag do ii=1,sze_csf W_csf(ii,shift+kk)=tmpW(kk,ii) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 2a4f53d4..b554754d 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -360,10 +360,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ double precision :: ticks_0, ticks_1 integer*8 :: irp_imax irp_imax = 1 - ticks_0 = irp_rdtsc() + !ticks_0 = irp_rdtsc() + !U = 0d0 + !U(1,1)=1.0d0 call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) - ticks_1 = irp_rdtsc() - print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" + !ticks_1 = irp_rdtsc() + !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" + !print *,W(1,1) + !stop endif S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) ! else From 4583fa80b0dc70872973df2c502e36dbcea5b5db Mon Sep 17 00:00:00 2001 From: v1j4y Date: Wed, 14 Sep 2022 23:26:48 +0200 Subject: [PATCH 30/70] Fixed bug in AIJ container initialization for doublet. --- src/csf/cfgCI_utils.c | 1 - src/csf/sigma_vector.irp.f | 22 +++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index ad3af92e..76b64dd0 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1798,7 +1798,6 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in for(int j = 0; j < rowsJ; j++) CSFICSFJApqIJ[j*rowsI + i] = tmpCSFICSFJApqIJ[i*rowsJ + j]; - // Garbage collection free(overlapMatrixI); free(overlapMatrixJ); diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 8f6e27e8..f9a37077 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -699,7 +699,7 @@ end subroutine get_phase_qp_to_cfg ! 3. SOMO -> VMO !print *,"Doing SOMO -> VMO" !AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0 - AIJpqContainer(1,1,1,1,3,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,1:2,1:2,3,NSOMOMin) = 1.0d0 do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 @@ -761,7 +761,7 @@ end subroutine get_phase_qp_to_cfg ! 4. DOMO -> SOMO !print *,"Doing DOMO -> SOMO" !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0 - AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,1:2,1:2,4,NSOMOMin) = 1.0d0 do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 @@ -1562,9 +1562,9 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze end do endif meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib) - if(jj.eq.1.and.ii.eq.1)then - print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q - endif + !if(jj.eq.1.and.ii.eq.1)then + ! print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q + !endif call omp_set_lock(lock(jj)) do kk = 1,n_st psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) @@ -1643,10 +1643,15 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, & nconnectedI, excitationIds, excitationTypes, diagfactors) + if(nconnectedI .EQ. 0) then cycle endif + !if(i .EQ. 1) then + ! print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)) + !endif + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. totcolsTKI = 0 rowsTKI = -1 @@ -1685,14 +1690,21 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) rowsTKI = rowsikpq + !if(i.eq.1) then + ! print *,rowsTKI,colsikpq," | ",pmodel,qmodel,extype,NSOMOalpha + !endif do m = 1,colsikpq do l = 1,rowsTKI do kk = 1,n_st TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) & * psi_in(kk,idxs_connectedI_alpha(j)+m-1) enddo + !if(i.eq.1) then + ! print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) + !endif enddo enddo + diagfactors_0 = diagfactors(j)*0.5d0 moi = excitationIds(1,j) ! p mok = excitationIds(2,j) ! q From 409aa823732d9e1bd150da9aac84a66af4d2423a Mon Sep 17 00:00:00 2001 From: v1j4y Date: Thu, 15 Sep 2022 19:18:13 +0200 Subject: [PATCH 31/70] Doublet also works. Running tests. --- src/csf/sigma_vector.irp.f | 9 ++++++++- src/davidson/diagonalization_hcfg.irp.f | 11 +++++++++-- src/davidson/diagonalization_hs2_dressed.irp.f | 12 +++++++++--- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index f9a37077..21c19aaa 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -761,7 +761,10 @@ end subroutine get_phase_qp_to_cfg ! 4. DOMO -> SOMO !print *,"Doing DOMO -> SOMO" !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0 - AIJpqContainer(1,1,1:2,1:2,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,2,2,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,2,1,4,NSOMOMin) =-1.0d0 + AIJpqContainer(1,1,1,2,4,NSOMOMin) =-1.0d0 do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 @@ -779,6 +782,10 @@ end subroutine get_phase_qp_to_cfg Jsomo = ISHFT(1_8,j)-1 endif + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + !AIJpqContainer(i,4,k,l,:,:) = 0.0d0 AIJpqContainer(:,:,k,l,4,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index 7da2e1d2..d2050bc7 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -324,7 +324,8 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N tmpU(kk,ii) = U_csf(ii,shift+kk) enddo enddo - !tmpU(1,1)=1.0d0 + !tmpU =0.0d0 + !tmpU(1,2)=1.0d0 double precision :: irp_rdtsc double precision :: ticks_0, ticks_1 integer*8 :: irp_imax @@ -333,7 +334,13 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) !ticks_1 = irp_rdtsc() !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" - !print *,' tmpW(1,1)=',tmpW(1,1) + !print *,' tmpW(1,2)=',tmpW(1,2) + !do ii=1,sze + ! if (dabs(tmpW(1,ii)) > 1e-18) then + ! print *,tmpW(1,ii) + ! print *,ii,"somo=",psi_configuration(1,1,ii)," domo=",psi_configuration(1,2,ii) + ! endif + !end do !stop do kk=1,N_st_diag do ii=1,sze_csf diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index b554754d..cd98c925 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -149,7 +149,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2, itermax, istate + integer :: shift, shift2, itermax, istate, ii double precision :: r1, r2, alpha logical :: state_ok(N_st_diag_in*davidson_sze_max) integer :: nproc_target @@ -362,11 +362,17 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ irp_imax = 1 !ticks_0 = irp_rdtsc() !U = 0d0 - !U(1,1)=1.0d0 + !U(4587,1)=1.0d0 call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) !ticks_1 = irp_rdtsc() !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" - !print *,W(1,1) + !print *,' tmpW(4587,1)=',W(4587,1) + !do ii=1,sze + ! if (dabs(W(ii,1)) > 1e-18) then + ! print *,W(ii,1) + ! print *,ii,"alpha=",psi_det(1,1,ii)," beta=",psi_det(1,2,ii) + ! endif + !end do !stop endif S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) From 7618a9202bb8e7cb6c6b45c30a1ac66181452f85 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Thu, 15 Sep 2022 21:41:10 +0200 Subject: [PATCH 32/70] All tests are passing for N_Int == 1. --- src/davidson/diagonalization_hcfg.irp.f | 8 -------- src/davidson/diagonalization_hs2_dressed.irp.f | 10 ---------- 2 files changed, 18 deletions(-) diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f index d2050bc7..659602a1 100644 --- a/src/davidson/diagonalization_hcfg.irp.f +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -334,14 +334,6 @@ subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) !ticks_1 = irp_rdtsc() !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" - !print *,' tmpW(1,2)=',tmpW(1,2) - !do ii=1,sze - ! if (dabs(tmpW(1,ii)) > 1e-18) then - ! print *,tmpW(1,ii) - ! print *,ii,"somo=",psi_configuration(1,1,ii)," domo=",psi_configuration(1,2,ii) - ! endif - !end do - !stop do kk=1,N_st_diag do ii=1,sze_csf W_csf(ii,shift+kk)=tmpW(kk,ii) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index cd98c925..816f16ec 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -361,19 +361,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer*8 :: irp_imax irp_imax = 1 !ticks_0 = irp_rdtsc() - !U = 0d0 - !U(4587,1)=1.0d0 call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) !ticks_1 = irp_rdtsc() !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" - !print *,' tmpW(4587,1)=',W(4587,1) - !do ii=1,sze - ! if (dabs(W(ii,1)) > 1e-18) then - ! print *,W(ii,1) - ! print *,ii,"alpha=",psi_det(1,1,ii)," beta=",psi_det(1,2,ii) - ! endif - !end do - !stop endif S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) ! else From 2234050bf70c10b29f8f202260b55d922a0dc85a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 Oct 2022 14:00:29 +0200 Subject: [PATCH 33/70] Cleaning OCaml --- ocaml/Message.ml | 8 ++++---- ocaml/TaskServer.ml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/Message.ml b/ocaml/Message.ml index b7d77430..049203d7 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -63,11 +63,11 @@ end module Connect_msg : sig type t = Tcp | Inproc | Ipc - val create : typ:string -> t + val create : string -> t val to_string : t -> string end = struct type t = Tcp | Inproc | Ipc - let create ~typ = + let create typ = match typ with | "tcp" -> Tcp | "inproc" -> Inproc @@ -515,9 +515,9 @@ let of_string s = | Connect_ socket -> Connect (Connect_msg.create socket) | NewJob_ { state ; push_address_tcp ; push_address_inproc } -> - Newjob (Newjob_msg.create push_address_tcp push_address_inproc state) + Newjob (Newjob_msg.create ~address_tcp:push_address_tcp ~address_inproc:push_address_inproc ~state) | EndJob_ state -> - Endjob (Endjob_msg.create state) + Endjob (Endjob_msg.create ~state) | GetData_ { state ; client_id ; key } -> GetData (GetData_msg.create ~client_id ~state ~key) | PutData_ { state ; client_id ; key } -> diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 92a6f5ca..ad827316 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -776,7 +776,7 @@ let run ~port = Zmq.Socket.create zmq_context Zmq.Socket.rep in Zmq.Socket.set_linger_period rep_socket 1_000_000; - bind_socket "REP" rep_socket port; + bind_socket ~socket_type:"REP" ~socket:rep_socket ~port; let initial_program_state = { queue = Queuing_system.create () ; From d387ee549a40133b53ba980d4dfc4d9cc5ccd639 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 1 Nov 2022 10:21:36 +0100 Subject: [PATCH 34/70] Fix compilation with gfortran --- .../dav_diag_dressed_ext_rout.irp.f | 14 +++++------ .../dav_double_dress_ext_rout.irp.f | 23 ++++++++++--------- .../dav_dressed_ext_rout.irp.f | 18 +++++++-------- src/dav_general_mat/dav_ext_rout.irp.f | 10 ++++---- src/dav_general_mat/dav_general.irp.f | 12 +++++----- 5 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 243e9995..73608720 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -27,7 +27,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz double precision, intent(in) :: H_jj(sze),Dress_jj(sze) double precision, intent(inout) :: u_in(sze,N_st_diag_in) double precision, intent(out) :: energies(N_st) - external hcalc + external :: hcalc integer :: iter, N_st_diag integer :: i,j,k,l,m @@ -207,7 +207,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -238,10 +238,10 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz call ortho_qr(U,size(U,1),sze,shift2) ! it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag) ! where sze is the size of the vector, N_st_diag is the number of states - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze) ! Compute then the DIAGONAL PART OF THE DRESSING ! += Dress_jj(i) * - call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze) + call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze) else ! Already computed in update below continue @@ -303,9 +303,9 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -319,7 +319,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index 884fd672..e59d21d1 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -31,7 +31,8 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies double precision, intent(inout) :: u_in(sze,N_st_diag) double precision, intent(out) :: energies(N_st_diag) logical, intent(out) :: converged - external hcalc + + external :: hcalc double precision, allocatable :: H_jj_tmp(:) ASSERT (N_st > 0) @@ -224,7 +225,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies u_in(k,k) = u_in(k,k) + 10.d0 enddo do k=1,N_st_diag_in - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo do k=1,N_st_diag_in @@ -248,10 +249,10 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies if ((iter > 1).or.(itertot == 1)) then ! Compute |W_k> = \sum_i |i> ! ----------------------------------- - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag_in,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag_in,sze) ! Compute then the DIAGONAL PART OF THE DRESSING ! += Dress_jj(i) * - call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze) + call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze) else ! Already computed in update below continue @@ -275,20 +276,20 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies ! ! call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, & ! psi_coef, size(psi_coef,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, & ! Dressing_vec, size(Dressing_vec,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) ! ! ! call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, & ! Dressing_vec, size(Dressing_vec,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, & ! psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) ! endif @@ -376,9 +377,9 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag_in, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag_in, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -392,7 +393,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c3bfe91a..c045aa1a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -214,7 +214,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -244,7 +244,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di call ortho_qr(U,size(U,1),sze,shift2) ! it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag) ! where sze is the size of the vector, N_st_diag is the number of states - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze) else ! Already computed in update below continue @@ -268,20 +268,20 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di stop ! call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & ! psi_coef, size(psi_coef,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & ! dressing_vec, size(dressing_vec,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) ! ! ! call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & ! dressing_vec, size(dressing_vec,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & ! psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) endif endif @@ -370,9 +370,9 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -386,7 +386,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index 868d928b..2621e3a9 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -196,7 +196,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -226,7 +226,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co call ortho_qr(U,size(U,1),sze,shift2) ! it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag) ! where sze is the size of the vector, N_st_diag is the number of states - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze) else ! Already computed in update below continue @@ -288,9 +288,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -304,7 +304,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index aa4a2eb3..cd9124e6 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -206,7 +206,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -236,8 +236,8 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv call ortho_qr(U,size(U,1),sze,shift2) call ortho_qr(U,size(U,1),sze,shift2) -! call H_S2_u_0_nstates_openmp(W(1,shift+1),U(1,shift+1),N_st_diag,sze) - call hpsi(W(1,shift+1),U(1,shift+1),N_st_diag,sze,h_mat) +! call H_S2_u_0_nstates_openmp(W(:,shift+1),U(:,shift+1),N_st_diag,sze) + call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat) else ! Already computed in update below continue @@ -299,9 +299,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -315,7 +315,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif From 05be0276f05670d36e1b187062bc0904cec91463 Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 3 Nov 2022 14:30:30 +0100 Subject: [PATCH 35/70] 2 by 2 diag --- src/utils/util.irp.f | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index 84593031..f295aea7 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -411,3 +411,28 @@ subroutine lowercase(txt,n) enddo end +subroutine v2_over_x(v,x,res) + + !BEGIN_DOC + ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0 + !END_DOC + + implicit none + + double precision, intent(in) :: v, x + double precision, intent(out) :: res + + double precision :: delta_E, tmp, val + + res = 0d0 + delta_E = x + if (v == 0.d0) return + + val = 2d0 * v + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + res = 0.5d0 * (tmp - delta_E) + +end From 90f78a9b782842429c970213018381c9516ef241 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Thu, 3 Nov 2022 15:36:07 +0100 Subject: [PATCH 36/70] Remove QR for CSFs because it does not work. --- src/utils/linear_algebra.irp.f | 191 +++++++++++++++++---------------- 1 file changed, 96 insertions(+), 95 deletions(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 3463370f..16f67182 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1133,102 +1133,103 @@ subroutine ortho_svd(A,LDA,m,n) end -subroutine ortho_qr_withB(A,LDA,B,m,n) - implicit none - BEGIN_DOC - ! Orthogonalization using Q.R factorization - ! - ! A : Overlap Matrix - ! - ! LDA : leftmost dimension of A - ! - ! m : Number of rows of A - ! - ! n : Number of columns of A - ! - ! B : Output orthogonal basis - ! - END_DOC - integer, intent(in) :: m,n, LDA - double precision, intent(inout) :: A(LDA,n) - double precision, intent(inout) :: B(LDA,n) +! QR to orthonormalize CSFs does not work :-( +!subroutine ortho_qr_withB(A,LDA,B,m,n) +! implicit none +! BEGIN_DOC +! ! Orthogonalization using Q.R factorization +! ! +! ! A : Overlap Matrix +! ! +! ! LDA : leftmost dimension of A +! ! +! ! m : Number of rows of A +! ! +! ! n : Number of columns of A +! ! +! ! B : Output orthogonal basis +! ! +! END_DOC +! integer, intent(in) :: m,n, LDA +! double precision, intent(inout) :: A(LDA,n) +! double precision, intent(inout) :: B(LDA,n) +! +! integer :: LWORK, INFO +! integer, allocatable :: jpvt(:) +! double precision, allocatable :: TAU(:), WORK(:) +! double precision, allocatable :: C(:,:) +! double precision :: norm +! integer :: i,j +! +! allocate (TAU(min(m,n)), WORK(1)) +! allocate (jpvt(n)) +! !print *," In function ortho" +! B = A +! +! jpvt(1:n)=1 +! +! LWORK=-1 +! call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) +! +! ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! LWORK=max(n,int(WORK(1))) +! +! deallocate(WORK) +! allocate(WORK(LWORK)) +! call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) +! print *,A +! print *,jpvt +! deallocate(WORK,TAU) +! !stop +! +! !LWORK=-1 +! !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) +! !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! !LWORK=max(n,int(WORK(1))) +! +! !deallocate(WORK) +! !allocate(WORK(LWORK)) +! !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) +! +! !LWORK=-1 +! !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) +! !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! !LWORK=max(n,int(WORK(1))) +! +! !deallocate(WORK) +! !allocate(WORK(LWORK)) +! !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) +! ! +! !allocate(C(LDA,n)) +! !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA) +! !norm = 0.0d0 +! !B = 0.0d0 +! !!print *,C +! !do i=1,m +! ! norm = 0.0d0 +! ! do j=1,n +! ! norm = norm + C(j,i)*C(j,i) +! ! end do +! ! norm = 1.0d0/dsqrt(norm) +! ! do j=1,n +! ! B(j,i) = C(j,i) +! ! end do +! !end do +! !print *,B +! +! +! !deallocate(WORK,TAU) +!end - integer :: LWORK, INFO - integer, allocatable :: jpvt(:) - double precision, allocatable :: TAU(:), WORK(:) - double precision, allocatable :: C(:,:) - double precision :: norm - integer :: i,j - - allocate (TAU(min(m,n)), WORK(1)) - allocate (jpvt(n)) - !print *," In function ortho" - B = A - - jpvt(1:n)=1 - - LWORK=-1 - call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) - - ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 - LWORK=max(n,int(WORK(1))) - - deallocate(WORK) - allocate(WORK(LWORK)) - call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) - print *,A - print *,jpvt - deallocate(WORK,TAU) - !stop - - !LWORK=-1 - !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) - !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 - !LWORK=max(n,int(WORK(1))) - - !deallocate(WORK) - !allocate(WORK(LWORK)) - !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) - - !LWORK=-1 - !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) - !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 - !LWORK=max(n,int(WORK(1))) - - !deallocate(WORK) - !allocate(WORK(LWORK)) - !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) - ! - !allocate(C(LDA,n)) - !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA) - !norm = 0.0d0 - !B = 0.0d0 - !!print *,C - !do i=1,m - ! norm = 0.0d0 - ! do j=1,n - ! norm = norm + C(j,i)*C(j,i) - ! end do - ! norm = 1.0d0/dsqrt(norm) - ! do j=1,n - ! B(j,i) = C(j,i) - ! end do - !end do - !print *,B - - - !deallocate(WORK,TAU) -end - -subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf") - use iso_c_binding - integer(c_int32_t), value :: LDA - integer(c_int32_t), value :: m - integer(c_int32_t), value :: n - integer(c_int16_t) :: A(LDA,n) - integer(c_int16_t) :: B(LDA,n) - call ortho_qr_withB(A,LDA,B,m,n) -end subroutine ortho_qr_csf +!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf") +! use iso_c_binding +! integer(c_int32_t), value :: LDA +! integer(c_int32_t), value :: m +! integer(c_int32_t), value :: n +! integer(c_int16_t) :: A(LDA,n) +! integer(c_int16_t) :: B(LDA,n) +! call ortho_qr_withB(A,LDA,B,m,n) +!end subroutine ortho_qr_csf subroutine ortho_qr(A,LDA,m,n) implicit none From 3e44e79be5000dbc6ae791f78ffec721a953c503 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Thu, 3 Nov 2022 15:38:05 +0100 Subject: [PATCH 37/70] Deactive qr also in CFG utils function. --- src/csf/cfgCI_utils.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 76b64dd0..3510db37 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -255,23 +255,24 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols); -void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){ - int i,j; - //for(j=0;j Date: Thu, 3 Nov 2022 15:40:05 +0100 Subject: [PATCH 38/70] Remove debug printing and fix compilation issue. --- src/csf/sigma_vector.irp.f | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 21c19aaa..3fa437a5 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -112,7 +112,7 @@ ! endif !enddo n_CSF = 0 - print *," -9(((((((((((((( NSOMOMin=",NSOMOMin + !print *," -9(((((((((((((( NSOMOMin=",NSOMOMin ncfgprev = cfg_seniority_index(NSOMOMin) ! can be -1 if(ncfgprev.eq.-1)then ncfgprev=1 @@ -145,10 +145,10 @@ endif endif n_CSF += ncfg*dimcsfpercfg - print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i) + !print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i) ncfgprev = cfg_seniority_index(i+2) end do - print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration + !print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration END_PROVIDER @@ -240,7 +240,7 @@ end subroutine get_phase_qp_to_cfg ! initialization psi_coef_config = 0.d0 DetToCSFTransformationMatrix(0,:,:) = 1.d0 - do i = 2-iand(MS,1), NSOMOMax,2 + do i = 2-iand(MS,1_8), NSOMOMax,2 Isomo = IBSET(0_8, i) - 1_8 ! rows = Ncsfs ! cols = Ndets @@ -342,8 +342,8 @@ end subroutine get_phase_qp_to_cfg nsomomin = elec_alpha_num-elec_beta_num rowsmax = 0 colsmax = 0 - print *,"NSOMOMax = ",NSOMOMax - print *,"NSOMOMin = ",NSOMOMin + !print *,"NSOMOMax = ",NSOMOMax + !print *,"NSOMOMin = ",NSOMOMin !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) ! Type ! 1. SOMO -> SOMO @@ -525,7 +525,7 @@ end subroutine get_phase_qp_to_cfg end do end do end do - print *,"Rowsmax=",rowsmax," Colsmax=",colsmax + !print *,"Rowsmax=",rowsmax," Colsmax=",colsmax END_PROVIDER BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)] From c656d686215f2c6bec4e5b5e8c5266c638b7ee45 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 13:46:50 +0100 Subject: [PATCH 39/70] init commit trust region --- src/utils_trust_region/EZFIO.cfg | 89 + src/utils_trust_region/NEED | 1 + src/utils_trust_region/README.rst | 5 + src/utils_trust_region/TANGLE_org_mode.sh | 7 + src/utils_trust_region/algo_trust.org | 593 ++++++ src/utils_trust_region/apply_mo_rotation.org | 86 + src/utils_trust_region/mat_to_vec_index.org | 63 + src/utils_trust_region/rotation_matrix.org | 454 +++++ .../rotation_matrix_iterative.org | 136 ++ .../sub_to_full_rotation_matrix.org | 65 + .../trust_region_expected_e.org | 121 ++ .../trust_region_optimal_lambda.org | 1665 +++++++++++++++++ src/utils_trust_region/trust_region_rho.org | 123 ++ src/utils_trust_region/trust_region_step.org | 726 +++++++ src/utils_trust_region/vec_to_mat_index.org | 72 + src/utils_trust_region/vec_to_mat_v2.org | 40 + 16 files changed, 4246 insertions(+) create mode 100644 src/utils_trust_region/EZFIO.cfg create mode 100644 src/utils_trust_region/NEED create mode 100644 src/utils_trust_region/README.rst create mode 100755 src/utils_trust_region/TANGLE_org_mode.sh create mode 100644 src/utils_trust_region/algo_trust.org create mode 100644 src/utils_trust_region/apply_mo_rotation.org create mode 100644 src/utils_trust_region/mat_to_vec_index.org create mode 100644 src/utils_trust_region/rotation_matrix.org create mode 100644 src/utils_trust_region/rotation_matrix_iterative.org create mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.org create mode 100644 src/utils_trust_region/trust_region_expected_e.org create mode 100644 src/utils_trust_region/trust_region_optimal_lambda.org create mode 100644 src/utils_trust_region/trust_region_rho.org create mode 100644 src/utils_trust_region/trust_region_step.org create mode 100644 src/utils_trust_region/vec_to_mat_index.org create mode 100644 src/utils_trust_region/vec_to_mat_v2.org diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg new file mode 100644 index 00000000..9c9f6248 --- /dev/null +++ b/src/utils_trust_region/EZFIO.cfg @@ -0,0 +1,89 @@ +[thresh_delta] +type: double precision +doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta +interface: ezfio,provider,ocaml +default: 1.e-10 + +[thresh_rho] +type: double precision +doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_eig] +type: double precision +doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[thresh_model] +type: double precision +doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[absolute_eig] +type: logical +doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) +interface: ezfio,provider,ocaml +default: false + +[thresh_wtg] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_wtg2] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day +interface: ezfio,provider,ocaml +default: 1.e-6 + +[avoid_saddle] +type: logical +doc: Test to avoid saddle point, active if true +interface: ezfio,provider,ocaml +default: false + +[version_avoid_saddle] +type: integer +doc: cf. trust region, not stable +interface: ezfio,provider,ocaml +default: 3 + +[thresh_rho_2] +type: double precision +doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_cc] +type: double precision +doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_model_2] +type: double precision +doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning +interface: ezfio,provider,ocaml +default: 1.e-12 + +[version_lambda_search] +type: integer +doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 +interface: ezfio,provider,ocaml +default: 2 + +[nb_it_max_lambda] +type: integer +doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 100 + +[nb_it_max_pre_search] +type: integer +doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED new file mode 100644 index 00000000..1a65ce38 --- /dev/null +++ b/src/utils_trust_region/NEED @@ -0,0 +1 @@ +hartree_fock diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst new file mode 100644 index 00000000..6a0689b6 --- /dev/null +++ b/src/utils_trust_region/README.rst @@ -0,0 +1,5 @@ +============ +trust_region +============ + +The documentation can be found in the org files. diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org new file mode 100644 index 00000000..40742b45 --- /dev/null +++ b/src/utils_trust_region/algo_trust.org @@ -0,0 +1,593 @@ +* Algorithm for the trust region + +step_in_trust_region: +Computes the step in the trust region (delta) +(automatically sets at the iteration 0 and which evolves during the +process in function of the evolution of rho). The step is computing by +constraining its norm with a lagrange multiplier. +Since the calculation of the step is based on the Newton method, an +estimation of the gain in energy is given using the Taylors series +truncated at the second order (criterion_model). +If (DABS(criterion-criterion_model) < 1d-12) then + must_exit = .True. +else + must_exit = .False. + +This estimation of the gain in energy is used by +is_step_cancel_trust_region to say if the step is accepted or cancelled. + +If the step must be cancelled, the calculation restart from the same +hessian and gradient and recomputes the step but in a smaller trust +region and so on until the step is accepted. If the step is accepted +the hessian and the gradient are recomputed to produce a new step. + +Example: + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo +#+END_SRC + +Variables: + +Input: +| n | integer | m*(m-1)/2 | +| m | integer | number of mo in the mo_class | +| H(n,n) | double precision | Hessian | +| v_grad(n) | double precision | Gradient | +| W(n,n) | double precision | Eigenvectors of the hessian | +| e_val(n) | double precision | Eigenvalues of the hessian | +| criterion | double precision | Actual criterion | +| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +| rho | double precision | Given by is_step_cancel_trus_region | +| | | Agreement between the real function and the Taylor series (2nd order) | +| nb_iter | integer | Actual number of iterations | + +Input/output: +| delta | double precision | Radius of the trust region | + +Output: +| criterion_model | double precision | Predicted criterion after the rotation | +| x(n) | double precision | Step | +| must_exit | logical | If the program must exit the loop | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n, nb_iter + double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine +#+END_SRC + +Variables: + +Input: +| nb_iter | integer | actual number of iterations | +| prev_criterion | double precision | criterion before the application of the step x | +| criterion | double precision | criterion after the application of the step x | +| criterion_model | double precision | predicted criterion after the application of x | + +Output: +| rho | double precision | Agreement between the predicted criterion and the real new criterion | +| cancel_step | logical | If the step must be cancelled | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine +#+END_SRC + +** Template for MOs +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt +subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) + double precision, allocatable :: prev_mos(:,:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation + integer :: nb_iter, info, nb_sub_iter + integer :: i,j,tmp_i,tmp_j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Provide the criterion, but unnecessary because it's done + ! automatically + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! The new hessian and gradient are computed at the end of the previous iteration + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Forces the step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! touch mo_coef + call clear_mo_map ! Only if you are using the bi-electronic integrals + ! mo_coef becomes valid + ! And avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! To enforce the program to provide new criterion after the update + ! of the parameters + FREE C_PROVIDER + PROVIDE C_PROVIDER + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step ? + if (cancel_step) then + ! Replacement by the previous MOs + mo_coef = prev_mos + ! call save_mos() ! depends of the time for 1 iteration + + ! No need to clear_mo_map since we don't recompute the gradient and the hessian + ! mo_coef becomes valid + ! Avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + else + ! The step is accepted: + ! criterion -> prev criterion + + ! The replacement "criterion -> prev criterion" is already done + ! in trust_region_rho, so if the criterion does not have a reason + ! to change, it will change nothing for the criterion and will + ! force the program to provide the new hessian, gradient and + ! convergence criterion for the next iteration. + ! But in the case of orbital optimization we diagonalize the CI + ! matrix after the "FREE" statement, so the criterion will change + + FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + prev_criterion = C_PROVIDER + + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! call save_mos() ! depends of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! Provide the convergence criterion + ! Provide the gradient and the hessian for the next iteration + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + ! Save the final MOs + call save_mos() + + ! Diagonalization of the hessian + ! (To see the eigenvalues at the end of the optimization) + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) + +end +#+END_SRC + +** Cartesian version +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt +subroutine algo_trust_cartesian_template(tmp_n) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit + integer :: nb_iter, nb_sub_iter + integer :: i,j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) + + PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + if (nb_iter > 0) then + PROVIDE H_PROVIDER g_PROVIDER + endif + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! New coordinates, check the sign + X_PROVIDER = X_PROVIDER - tmp_x + + ! touch X_PROVIDER + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! New criterion + PROVIDE C_PROVIDER ! Unnecessary + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancel the previous step + if (cancel_step) then + ! Replacement by the previous coordinates, check the sign + X_PROVIDER = X_PROVIDER + tmp_x + + ! Avoid the recomputation of the hessian and the gradient + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + deallocate(e_val, W, tmp_x) + +end +#+END_SRC + +** Script template +#+BEGIN_SRC bash :tangle script_template_mos.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + +#+BEGIN_SRC bash :tangle script_template_xyz.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_X_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file +sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org new file mode 100644 index 00000000..955997e9 --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.org @@ -0,0 +1,86 @@ +* Apply MO rotation +Subroutine to apply the rotation matrix to the coefficients of the +MOs. + +New MOs = Old MOs . Rotation matrix + +*Compute the new MOs with the previous MOs and a rotation matrix* + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +Intent in: +| R(mo_num,mo_num) | double precision | rotation matrix | + +Intent out: +| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +Internal: +| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +| i,j | integer | indexes | +#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org new file mode 100644 index 00000000..50840584 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.org @@ -0,0 +1,63 @@ +* Matrix to vector index + +*Compute the index i of a vector element from the indexes p,q of a +matrix element* + +Lower diagonal matrix (p,q), p > q -> vector (i) + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| p,q | integer | indexes of a matrix element in the lower diagonal | +| | | p > q, q -> column | +| | | p -> row, | +| | | q -> column | + +Input: +| i | integer | corresponding index in the vector | + +#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine +#+END_SRC + diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org new file mode 100644 index 00000000..63a8ae44 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.org @@ -0,0 +1,454 @@ +* Rotation matrix + +*Build a rotation matrix from an antisymmetric matrix* + +Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +$$ +\textbf{R}=\exp(\textbf{A}) +$$ + +So : +\begin{align*} +\textbf{R}=& \exp(\textbf{A}) \\ +=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +\end{align*} + +With : +$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +$\tau$ : $\sqrt{-x}$ +$x$ : eigenvalues of $\textbf{A}^2$ + +Input: +| A(n,n) | double precision | antisymmetric matrix | +| n | integer | number of columns of the A matrix | +| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +Output: +| R(n,n) | double precision | Rotation matrix | +| info | integer | if info = 0, the execution is successful | +| | | if info = k, the k-th parameter has an illegal value | +| | | if info = -k, the algorithm failed | + +Internal: +| B(n,n) | double precision | B = A.A | +| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +| W(n,n) | double precision | eigenvectors of B | +| e_val(n) | double precision | eigenvalues of B | +| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +| part_1(n,n) | double precision | matrix W.cos_tau.W^t | +| part_1a(n,n) | double precision | matrix cos_tau.W^t | +| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +| part_2a(n,n) | double precision | matrix W^t.A | +| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +| norm | integer | norm of R.R^t-1, must be equal to 0 | +| i,j | integer | indexes | + +Functions: +| dnrm2 | double precision | Lapack function, compute the norm of a matrix | +| disnan | logical | Lapack function, check if an element is NaN | + + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) +#+END_SRC + +** Pre-conditions +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Initialization + info=0 + enforce_step_cancellation = .False. + + ! Size of matrix A must be at least 1 by 1 + if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return + endif + + ! Leading dimension of A must be >= n + if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return + endif + + ! Leading dimension of A must be >= n + if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return + endif + + ! Matrix elements of A must by non-NaN + do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo + enddo + + do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif + enddo + + do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo + enddo + + ! Fix for too big elements ! bad idea better to cancel if the error is too big + !do j = 1, n + ! do i = 1, n + ! A(i,j) = mod(A(i,j),2d0*pi) + ! if (dabs(A(i,j)) > pi) then + ! A(i,j) = 0d0 + ! endif + ! enddo + !enddo + + max_elem_A = 0d0 + do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo + enddo + print*,'max element in A', max_elem_A + + if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' + endif + +#+END_SRC + +** Calculations + +*** B=A.A + - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ + - Diagonalization of $\textbf{B}$ + W, the eigenvectors + e_val, the eigenvalues + + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Compute B=A.A + + call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + + ! Copy B in W, diagonalization will put the eigenvectors in W + W=B + + ! Diagonalization of B + ! Eigenvalues -> e_val + ! Eigenvectors -> W + lwork = 3*n-1 + allocate(work(lwork,n)) + + print*,'Starting diagonalization ...' + + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + + deallocate(work) + + if (info2 == 0) then + print*, 'Diagonalization : Done' + elseif (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' + else + print*, "WARNING: Diagonalization failed to converge" + endif + #+END_SRC + +*** Tau^-1, cos(tau), sin(tau) + $$\tau = \sqrt{-x}$$ + - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ + - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ + - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ + These matrices are diagonals + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Diagonal matrix m_diag + do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif + enddo + + m_diag = 0.d0 + do i = 1, n + m_diag(i,i) = e_val(i) + enddo + + ! cos_tau + do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo + enddo + + ! sin_tau + do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo + enddo + + ! Debug, display the cos_tau and sin_tau matrix + !if (debug) then + ! print*, 'cos_tau' + ! do i = 1, n + ! print*, cos_tau(i,:) + ! enddo + ! print*, 'sin_tau' + ! do i = 1, n + ! print*, sin_tau(i,:) + ! enddo + !endif + + ! tau^-1 + do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo + enddo + + max_elem = 0d0 + do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif + enddo + print*,'max elem tau^-1:', max_elem + + ! Debug + !print*,'eigenvalues:' + !do i = 1, n + ! print*, e_val(i) + !enddo + + !Debug, display tau^-1 + !if (debug) then + ! print*, 'tau^-1' + ! do i = 1, n + ! print*,tau_m1(i,:) + ! enddo + !endif + #+END_SRC + +*** Rotation matrix + \begin{align*} + \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + \begin{align*} + \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \end{align*} + \begin{align*} + \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + + First: + part_1 = dgemm(W, dgemm(cos_tau, W^t)) + part_1a = dgemm(cos_tau, W^t) + part_1 = dgemm(W, part_1a) + And: + part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) + part_2a = dgemm(W^t, A) + part_2b = dgemm(sin_tau, part_2a) + part_2c = dgemm(tau_m1, part_2b) + part_2 = dgemm(W, part_2c) + Finally: + Rotation matrix, R = part_1+part_2 + + If $R$ is a rotation matrix: + $R.R^T=R^T.R=\textbf{1}$ + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! part_1 + call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + + ! part_2 + call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) + call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) + call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + + ! Rotation matrix R + R = part_1 + part_2 + + ! Matrix check + ! R.R^t and R^t.R must be equal to identity matrix + do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo + enddo + + call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + + norm = dnrm2(n*n,RR_t,1) + print*, 'Rotation matrix check, norm R.R^T = ', norm + + ! Debug + !if (debug) then + ! print*, 'RR_t' + ! do i = 1, n + ! print*, RR_t(i,:) + ! enddo + !endif + #+END_SRC + +*** Post conditions + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Check if R.R^T=1 + max_elem = 0d0 + do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo + enddo + + print*, 'Max error in R.R^T:', max_elem + print*, 'e_val(1):', e_val(1) + print*, 'e_val(n):', e_val(n) + print*, 'max elem in A:', max_elem_A + + if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. + endif + + ! Matrix elements of R must by non-NaN + do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo + enddo + + ! Display + !if (debug) then + ! print*,'Rotation matrix :' + ! do i = 1, n + ! write(*,'(100(F10.5))') R(i,:) + ! enddo + !endif + #+END_SRC + +** Deallocation, end + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine + #+END_SRC + diff --git a/src/utils_trust_region/rotation_matrix_iterative.org b/src/utils_trust_region/rotation_matrix_iterative.org new file mode 100644 index 00000000..f6cc9909 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix_iterative.org @@ -0,0 +1,136 @@ +* Rotation matrix with the iterative method + +\begin{align*} +\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +\end{align*} + +!!! Doesn't work !!! + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end +#+END_SRC + +** Factorial +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org new file mode 100644 index 00000000..f0cf0bfc --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org @@ -0,0 +1,65 @@ +* Rotation matrix in a subspace to rotation matrix in the full space + +Usually, we are using a list of MOs, for exemple the active ones. When +we compute a rotation matrix to rotate the MOs, we just compute a +rotation matrix for these MOs in order to reduce the size of the +matrix which has to be computed. Since the computation of a rotation +matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +reuce the number of MOs involved. +After that we replace the rotation matrix in the full space by +building the elements of the rotation matrix in the full space from +the elements of the rotation matrix in the subspace and adding some 0 +on the extradiagonal elements and some 1 on the diagonal elements, +for the MOs that are not involved in the rotation. + +Provided: +| mo_num | integer | Number of MOs | + +Input: +| m | integer | Size of tmp_list, m <= mo_num | +| tmp_list(m) | integer | List of MOs | +| tmp_R(m,m) | double precision | Rotation matrix in the space of | +| | | the MOs containing by tmp_list | + +Output: +| R(mo_num,mo_num | double precision | Rotation matrix in the space | +| | | of all the MOs | + +Internal: +| i,j | integer | indexes in the full space | +| tmp_i,tmp_j | integer | indexes in the subspace | + +#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org new file mode 100644 index 00000000..2ada7768 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.org @@ -0,0 +1,121 @@ +* Predicted energy : e_model + +*Compute the energy predicted by the Taylor series* + +The energy is predicted using a Taylor expansion truncated at te 2nd +order : + +\begin{align*} +E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +\end{align*} + +Input: +| n | integer | m*(m-1)/2 | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | + +Output: +| e_model | double precision | predicted energy after the rotation of the MOs | + +Internal: +| part_1 | double precision | v_grad^T.x | +| part_2 | double precision | 1/2 . x^T.H.x | +| part_2a | double precision | H.x | +| i,j | integer | indexes | + +Function: +| ddot | double precision | dot product (Lapack) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f +subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n),H(n,n),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) +#+END_SRC + +** Calculations + +part_1 corresponds to the product g.x +part_2a corresponds to the product H.x +part_2 corresponds to the product 0.5*(x^T.H.x) + +TODO: remove the dot products + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f + ! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + print*,'g.x : ', part_1 + !endif + + ! Product H.x + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + print*,'1/2*x^T.H.x : ', part_2 + !endif + + print*,'prev_energy', prev_energy + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'Predicted energy after the rotation : ', e_model + print*, 'Previous energy - predicted energy:', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + print*,'' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org new file mode 100644 index 00000000..6ed99d1d --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.org @@ -0,0 +1,1665 @@ +* Newton's method to find the optimal lambda + +*Compute the lambda value for the trust region* + +This subroutine uses the Newton method in order to find the optimal +lambda. This constant is added on the diagonal of the hessian to shift +the eiganvalues. It has a double role: +- ensure that the resulting hessian is positive definite for the + Newton method +- constrain the step in the trust region, i.e., + $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius + of the trust region. +We search $\lambda$ which minimizes +\begin{align*} + f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +\end{align*} +or +\begin{align*} + \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +\end{align*} +and gives obviously 0 in both cases. \newline + +There are several cases: +- If $\textbf{H}$ is positive definite the interval containing the + solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$ then the interval containing + the solution is $\lambda \in (-h_1, \infty)$. +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} = 0$ then the interval containing the solution is + $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < + 10^{-12}$ are not computed, so the term where $i = 1$ is + automatically removed and this case becomes similar to the previous one. + +So to avoid numerical problems (cf. trust_region) we start the +algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +with $\epsilon$ a little constant. +The research must be restricted to the interval containing the +solution. For that reason a little trust region in 1D is used. + +The Newton method to find the optimal $\lambda$ is : +\begin{align*} + \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +\end{align*} +$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +$\lambda$ at the l-th iteration, +$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +$\lambda$ at the l-th iteration.\newline + +Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +\begin{align*} + y \leq \alpha +\end{align*} +with $\alpha$ a scalar representing the trust length (trust region in +1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +Taylor series truncated at the second order. Thus, if $y > \alpha$, +the constraint is applied as +\begin{align*} + y^* = \alpha \frac{y}{|y|} +\end{align*} +with $y^*$ the solution in the trust region. + +The size of the trust region evolves in function of $\rho$ as for the +trust region seen previously cf. trust_region, rho_model. +The prediction of the value of $f$ or $\tilde{f}$ is done using the +Taylor series truncated at the second order cf. "trust_region", +"trust_e_model". + +The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 + = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) + \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} +\begin{align*} +\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +1/\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} + {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +Provided in qp_edit: +| thresh_rho_2 | +| thresh_cc | +| nb_it_max_lambda | +| version_lambda_search | +| nb_it_max_pre_search | +see qp_edit for more details + +Input: +| n | integer | m*(m-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +| delta | double precision | delta for the trust region | + +Output: +| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +| | | lambda > 0 | + +Internal: +| d1_N | double precision | value of d1_norm_trust_region | +| d2_N | double precision | value of d2_norm_trust_region | +| f_N | double precision | value of f_norm_trust_region | +| prev_f_N | double precision | previous value of f_norm_trust_region | +| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +| prev_f_R | double precision | previous value of f_R | +| model | double precision | predicted value of f_R from prev_f_R and y | +| d_1 | double precision | value of the first derivative | +| d_2 | double precision | value of the second derivative | +| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +| prev_lambda | double precision | previous value of lambda | +| t1,t2,t3 | double precision | wall time | +| i | integer | index | +| epsilon | double precision | little constant to avoid numerical problem | +| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +| version | integer | version of the root finding method | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| f_norm_trust_region | double precision | value of norm(x)^2 | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + print*,'' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + if (version_lambda_search == 1) then + print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + else + print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + endif + ! Version 2 is normally better +#+END_SRC + +Resolution with the Newton method: + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f + ! Initialization + epsilon = 1d-4 + lambda =MAX(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + print*, 'Pre research of lambda:' + print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + print*, 'e_val(1):', e_val(1) + print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + print*,'Threshold for the CC:', thresh_cc + print*,'Threshold for rho_2:', thresh_rho_2 + + print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + if (debug) then + print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + endif + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + print*,'--------------------------------------' + print*,'Research of lambda, iteration:', i + print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + print*,'prev lambda:', prev_lambda + print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + if (version_lambda_search == 1) then + print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + else + print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + endif + + print*,'previous - actual:', prev_f_R - f_R + print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + print*,'' + print*,'lambda, ||x||, delta:' + print*, lambda, dsqrt(f_N), delta + print*,'CC:', DABS(1d0 - f_N/delta**2) + print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations :', i + print*,'Value of lambda :', lambda + print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 + print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'' + print*,'---End trust_newton---' + print*,'' + +end subroutine +#+END_SRC + +* OMP: First derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| mo_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function +#+END_SRC + +* OMP: Second derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +This function computes the second derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. +\begin{align*} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ + \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_accu2 | double precision | temporary array for the third sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function +#+END_SRC + +* OMP: Function value of ||x||^2 + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | + +Internal: +| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +| tmp_fN | double precision | temporary array for the function | +| i,j | integer | indexes | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function +#+END_SRC + +* First derivative of (||x||^2 - Delta^2)^2 +Version without omp + +*Function to compute the first derivative of ||x||^2 - Delta* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function +#+END_SRC + +* Second derivative of (||x||^2 - Delta^2)^2 +Version without OMP + +*Function to compute the second derivative of ||x||^2 - Delta* + + +\begin{equation} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{equation} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function +#+END_SRC + +* Function value of ||x||^2 +Version without OMP + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| f_norm_trust_region | double precision | value of norm(x)^2 | +| ddot | double precision | blas dot product | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function +#+END_SRC + +* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end +#+END_SRC + +* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end +#+END_SRC + +* First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end +#+END_SRC + +* Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end +#+END_SRC diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org new file mode 100644 index 00000000..698e1fab --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.org @@ -0,0 +1,123 @@ +* Agreement with the model: Rho + +*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +Rho represents the agreement between the model (the predicted energy +by the Taylor expansion truncated at the 2nd order) and the real +energy : + +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} +With : +$E^{k}$ the energy at the previous iteration +$E^{k+1}$ the energy at the actual iteration +$m^{k+1}$ the predicted energy for the actual iteration +(cf. trust_e_model) + +If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +If $\rho \leq 0$ the previous energy is lower than the actual +energy. We have to cancel the last step and use a smaller trust +region. +Here we cancel the last step if $\rho < 0.1$, because even if +the energy decreases, the agreement is bad, i.e., the Taylor expansion +truncated at the second order doesn't represent correctly the energy +landscape. So it's better to cancel the step and restart with a +smaller trust region. + +Provided in qp_edit: +| thresh_rho | + +Input: +| prev_energy | double precision | previous energy (energy before the rotation) | +| e_model | double precision | predicted energy after the rotation | + +Output: +| rho | double precision | the agreement between the model (predicted) and the real energy | +| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +| | | else the previous energy doesn't change | + +Internal: +| energy | double precision | energy (real) after the rotation | +| i | integer | index | +| t* | double precision | time | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + call wall_time(t1) +#+END_SRC + +** Rho +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} + +In function of $\rho$ th step can be accepted or cancelled. + +If we cancel the last step (k+1), the previous energy (k) doesn't +change! +If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f + ! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + print*, 'previous energy, prev_energy :', prev_energy + print*, 'predicted energy, e_model :', e_model + print*, 'real energy, energy :', energy + print*, 'prev_energy - energy :', prev_energy - energy + print*, 'prev_energy - e_model :', prev_energy - e_model + print*, 'Rho :', rho + print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + print*,'' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/trust_region_step.org b/src/utils_trust_region/trust_region_step.org new file mode 100644 index 00000000..85832672 --- /dev/null +++ b/src/utils_trust_region/trust_region_step.org @@ -0,0 +1,726 @@ +* Trust region + +*Compute the next step with the trust region algorithm* + +The Newton method is an iterative method to find a minimum of a given +function. It uses a Taylor series truncated at the second order of the +targeted function and gives its minimizer. The minimizer is taken as +the new position and the same thing is done. And by doing so +iteratively the method find a minimum, a local or global one depending +of the starting point and the convexity/nonconvexity of the targeted +function. + +The goal of the trust region is to constrain the step size of the +Newton method in a certain area around the actual position, where the +Taylor series is a good approximation of the targeted function. This +area is called the "trust region". + +In addition, in function of the agreement between the Taylor +development of the energy and the real energy, the size of the trust +region will be updated at each iteration. By doing so, the step sizes +are not too larges. In addition, since we add a criterion to cancel the +step if the energy increases (more precisely if rho < 0.1), so it's +impossible to diverge. \newline + +References: \newline +Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +ISBN: 978-0-387-40065-5 \newline + +By using the first and the second derivatives, the Newton method gives +a step: +\begin{align*} + \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot + \textbf{g}_{(k)} +\end{align*} +which leads to the minimizer of the Taylor series. +!!! Warning: the Newton method gives the minimizer if and only if +$\textbf{H}$ is positive definite, else it leads to a saddle point !!! +But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +\begin{align*} + ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +\end{align*} +which is equivalent to +\begin{align*} + \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +\end{align*} + +with: \newline +$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +size n) \newline +$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +matrix) \newline +$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +size n) \newline +$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +\newline + +Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +hypersphere of radius $\Delta_{(k+1)}$.\newline + +So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +$\textbf{H}$ is positive definite, the +solution is the step given by the Newton method +$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +Else we have to constrain the step size. For simplicity we will remove +the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +Starting from the Taylor series of a function E (here, the energy) +truncated at the 2nd order, we have: +\begin{align*} + E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} + \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \mathcal{O}(\textbf{x}^2) +\end{align*} + +With the constraint on the norm of $\textbf{x}$ we can write the +Lagrangian +\begin{align*} + \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} + + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +\end{align*} +Where: \newline +$\lambda$ is the Lagrange multiplier \newline +$E$ is the energy at the k-th iteration $\Leftrightarrow +E(\textbf{x} = \textbf{0})$ \newline + +To solve this equation, we search a stationary point where the first +derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +\end{align*} + +The derivative is: +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} + = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +\end{align*} + +So, we search $\textbf{x}$ such as: +\begin{align*} +\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} += \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +\end{align*} + +We can rewrite that as: +\begin{align*} + \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} + = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +\end{align*} +with $\textbf{I}$ is the identity matrix. + +By doing so, the solution is: +\begin{align*} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +\end{align*} +\begin{align*} + \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} +with $\textbf{x}^T \textbf{x} = \Delta^2$. + +We have to solve this previous equation to find this $\textbf{x}$ in the +trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +just a one dimension problem because we can express $\textbf{x}$ as a +function of $\lambda$: +\begin{align*} + \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} + +We start from the fact that the hessian is diagonalizable. So we have: +\begin{align*} + \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +\end{align*} +with: \newline +$\textbf{H}$, the hessian matrix \newline +$\textbf{W}$, the matrix containing the eigenvectors \newline +$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +$h_i$, the i-th eigenvalue in ascending order \newline + +Now we use the fact that adding a constant on the diagonal just shifts +the eigenvalues: +\begin{align*} + \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} + +\textbf{I} \lambda) \cdot \textbf{W}^T +\end{align*} + +By doing so we can express $\textbf{x}$ as a function of $\lambda$ +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} +with $\lambda \neq - h_i$. + +An interesting thing in our case is the norm of $\textbf{x}$, +because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +\begin{align*} + ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot + \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +And if we study the properties of this function we see that: +\begin{align*} + \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +\end{align*} +and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +\begin{align*} + \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +\end{align*} + +From these limits and knowing that $h_1$ is the lowest eigenvalue, we +can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +strictly decreasing function on the interval $\lambda \in +(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +solution. + +Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +Newton method is only defined for a positive definite hessian matrix, +so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +definite. Consequently, in the case where $\textbf{H}$ is not positive +definite, to ensure the positive definiteness, $\lambda$ must be +greater than $- h_1$. +\begin{align*} + \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +\end{align*} + +From that there are five cases: +- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} + \lambda)$ + must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is + similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = + 0)|| \leq \Delta$ + but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point + +Thus to find the solution, we can write: +\begin{align*} + ||\textbf{x}(\lambda)|| = \Delta +\end{align*} +\begin{align*} + ||\textbf{x}(\lambda)|| - \Delta = 0 +\end{align*} + +Taking the square of this equation +\begin{align*} + (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +\end{align*} +we have a function with one minimum for the optimal $\lambda$. +Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +\begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +\end{align*} + +But in practice, it is more effective to solve: +\begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +\end{align*} + +To do that, we just use the Newton method with "trust_newton" using +first and second derivative of $(||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\textbf{x}$. +This will give the optimal $\lambda$ to compute the +solution $\textbf{x}$ with the formula seen previously: +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} + +The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Evolution of the trust region + +We initialize the trust region at the first iteration using a radius +\begin{align*} + \Delta = ||\textbf{x}(\lambda=0)|| +\end{align*} + +And for the next iteration the trust region will evolves depending of +the agreement of the energy prediction based on the Taylor series +truncated at the 2nd order and the real energy. If the Taylor series +truncated at the 2nd order represents correctly the energy landscape +the trust region will be extent else it will be reduced. In order to +mesure this agreement we use the ratio rho cf. "rho_model" and +"trust_e_model". From that we use the following values: +- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +In addition, if $\rho < 0.1$ the iteration is cancelled, so it +restarts with a smaller trust region until the energy decreases. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Summary + +To summarize, knowing the hessian (eigenvectors and eigenvalues), the +gradient and the radius of the trust region we can compute the norm of +the Newton step +\begin{align*} + ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n + \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +\end{align*} + +- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and + $\textbf{x}(\lambda=0)$ is in the trust region and it is not + necessary to put a constraint on $\textbf{x}$, the solution is the + unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in + the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = + -h_1)$, similarly to the previous case. + But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we + have to search $\lambda \in (-h_1, \infty)$ such as + $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method + \begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 + \end{align*} + or + \begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 + \end{align*} + which is numerically more stable. And finally compute + \begin{align*} + \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i + \end{align*} +- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we + do exactly the same thing that the previous case but we search + $\lambda \in (0, \infty)$ +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the + sum), again we do exactly the same thing that the previous case + searching $\lambda \in (-h_1, \infty)$. + + +For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +necessary in fact to remove the $j = 1$ in the sum since the term +where $h_i - \lambda < 10^{-6}$ are not computed. + +After that, we take this vector $\textbf{x}^*$, called "x", and we do +the transformation to an antisymmetric matrix $\textbf{X}$, called +m_x. This matrix $\textbf{X}$ will be used to compute a rotation +matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +NB: +An improvement can be done using a elleptical trust region. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Code + +Provided: +| mo_num | integer | number of MOs | + +Cf. qp_edit in orbital optimization section, for some constants/thresholds + +Input: +| m | integer | number of MOs | +| n | integer | m*(m-1)/2 | +| H(n, n) | double precision | hessian | +| v_grad(n) | double precision | gradient | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n, n) | double precision | eigenvectors of the hessian | +| rho | double precision | agreement between the model and the reality, | +| | | represents the quality of the energy prediction | +| nb_iter | integer | number of iteration | + +Input/Ouput: +| delta | double precision | radius of the trust region | + +Output: +| x(n) | double precision | vector containing the step | + +Internal: +| accu | double precision | temporary variable to compute the step | +| lambda | double precision | lagrange multiplier | +| trust_radius2 | double precision | square of the radius of the trust region | +| norm2_x | double precision | norm^2 of the vector x | +| norm2_g | double precision | norm^2 of the vector containing the gradient | +| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +| i, j, k | integer | indexes | + +Function: +| dnrm2 | double precision | Blas function computing the norm | +| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) +#+END_SRC + + +*** Initialization and norm + +The norm of the step size will be useful for the trust region +algorithm. We start from a first guess and the radius of the trust +region will evolve during the optimization. + +avoid_saddle is actually a test to avoid saddle points + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialization of the Lagrange multiplier + lambda = 0d0 + + ! List of w^T.g, to avoid the recomputation + tmp_wtg = 0d0 + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo + + ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue + ! in the case of avoid_saddle + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + + endif + + ! Norm^2 of x, ||x||^2 + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta + ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm + ! Anyway if the step is too big it will be reduced + print*,'||x||^2 :', norm2_x + + ! Norm^2 of the gradient, ||v_grad||^2 + norm2_g = (dnrm2(n,v_grad,1))**2 + print*,'||grad||^2 :', norm2_g +#+END_SRC + +*** Trust radius initialization + + At the first iteration (nb_iter = 0) we initialize the trust region + with the norm of the step generate by the Newton's method ($\textbf{x}_1 = + (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, + we compute this norm using f_norm_trust_region_omp as explain just + below) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! trust radius + if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) + endif +#+END_SRC + +*** Modification of the trust radius + +In function of rho (which represents the agreement between the model +and the reality, cf. rho_model) the trust region evolves. We update +delta (the radius of the trust region). + +To avoid too big trust region we put a maximum size. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Modification of the trust radius in function of rho + if (rho >= 0.75d0) then + delta = 2d0 * delta + elseif (rho >= 0.5d0) then + delta = delta + elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta + else + delta = 0.25d0 * delta + endif + + ! Maximum size of the trust region + !if (delta > 0.5d0 * n * pi) then + ! delta = 0.5d0 * n * pi + ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' + !endif + + if (delta > 1d10) then + delta = 1d10 + endif + + print*, 'Delta :', delta +#+END_SRC + +*** Calculation of the optimal lambda + +We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant + $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the + unconstrained one, $\lambda = 0$ + +You will find more details at the beginning + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 + ! and not (||x||^2 - delta)^2 = 0 + + ! Research of lambda to solve ||x(lambda)|| = Delta + + ! Display + print*, 'e_val(1) = ', e_val(1) + print*, 'w_1^T.g =', tmp_wtg(1) + + ! H positive definite + if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + print*, '||x(0)||=', dsqrt(norm2_x) + print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + ! H indefinite + else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + if (DABS(tmp_wtg(1)) < thresh_wtg) then + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + else + print*, 'H indefinite, w_1^T.g =/= 0' + endif + print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + endif + + ! Recomputation of the norm^2 of the step x + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + print*,'' + print*,'Summary after the trust region:' + print*,'lambda:', lambda + print*,'||x||:', dsqrt(norm2_x) + print*,'delta:', delta +#+END_SRC + +*** Calculation of the step x + +x refers to $\textbf{x}^*$ +We compute x in function of lambda using its formula : +\begin{align*} +\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i ++ \lambda} \cdot \textbf{w}_i +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialisation + x = 0d0 + + ! Calculation of the step x + + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif + + double precision :: beta, norm_x + + ! Test + ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) + ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first + ! eigenvectors multiply by a constant to ensure the condition + ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif + endif +#+END_SRC + +*** Transformation of x + +x is a vector of size n, so it can be write as a m by m +antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| i | integer | index in the vector | + +Ouput: +| p,q | integer | corresponding indexes in the lower diagonal of a matrix | +| | | p > q, | +| | | p -> row, | +| | | q -> column | + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org new file mode 100644 index 00000000..4ce5f5e1 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.org @@ -0,0 +1,40 @@ +* Vect to antisymmetric matrix using mat_to_vec_index + +Vector to antisymmetric matrix transformation using mat_to_vec_index +subroutine. + +Can be done in OMP (for the first part and with omp critical for the second) + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end +#+END_SRC From d985189ad6949d14de746e1afa5848d97df74594 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 13:48:17 +0100 Subject: [PATCH 40/70] file.irp.f --- src/utils_trust_region/algo_trust.irp.f | 248 +++ .../apply_mo_rotation.irp.f | 85 + src/utils_trust_region/rotation_matrix.irp.f | 443 +++++ .../sub_to_full_rotation_matrix.irp.f | 64 + .../trust_region_expected_e.irp.f | 119 ++ .../trust_region_optimal_lambda.irp.f | 1655 +++++++++++++++++ src/utils_trust_region/trust_region_rho.irp.f | 121 ++ .../trust_region_step.irp.f | 716 +++++++ src/utils_trust_region/vec_to_mat_v2.irp.f | 39 + 9 files changed, 3490 insertions(+) create mode 100644 src/utils_trust_region/algo_trust.irp.f create mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f create mode 100644 src/utils_trust_region/rotation_matrix.irp.f create mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f create mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f create mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f create mode 100644 src/utils_trust_region/trust_region_rho.irp.f create mode 100644 src/utils_trust_region/trust_region_step.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f new file mode 100644 index 00000000..bfb3a0ff --- /dev/null +++ b/src/utils_trust_region/algo_trust.irp.f @@ -0,0 +1,248 @@ +! Algorithm for the trust region + +! step_in_trust_region: +! Computes the step in the trust region (delta) +! (automatically sets at the iteration 0 and which evolves during the +! process in function of the evolution of rho). The step is computing by +! constraining its norm with a lagrange multiplier. +! Since the calculation of the step is based on the Newton method, an +! estimation of the gain in energy is given using the Taylors series +! truncated at the second order (criterion_model). +! If (DABS(criterion-criterion_model) < 1d-12) then +! must_exit = .True. +! else +! must_exit = .False. + +! This estimation of the gain in energy is used by +! is_step_cancel_trust_region to say if the step is accepted or cancelled. + +! If the step must be cancelled, the calculation restart from the same +! hessian and gradient and recomputes the step but in a smaller trust +! region and so on until the step is accepted. If the step is accepted +! the hessian and the gradient are recomputed to produce a new step. + +! Example: + + +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo + + + +! Variables: + +! Input: +! | n | integer | m*(m-1)/2 | +! | m | integer | number of mo in the mo_class | +! | H(n,n) | double precision | Hessian | +! | v_grad(n) | double precision | Gradient | +! | W(n,n) | double precision | Eigenvectors of the hessian | +! | e_val(n) | double precision | Eigenvalues of the hessian | +! | criterion | double precision | Actual criterion | +! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +! | rho | double precision | Given by is_step_cancel_trus_region | +! | | | Agreement between the real function and the Taylor series (2nd order) | +! | nb_iter | integer | Actual number of iterations | + +! Input/output: +! | delta | double precision | Radius of the trust region | + +! Output: +! | criterion_model | double precision | Predicted criterion after the rotation | +! | x(n) | double precision | Step | +! | must_exit | logical | If the program must exit the loop | + + +subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n, nb_iter + double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine + + + +! Variables: + +! Input: +! | nb_iter | integer | actual number of iterations | +! | prev_criterion | double precision | criterion before the application of the step x | +! | criterion | double precision | criterion after the application of the step x | +! | criterion_model | double precision | predicted criterion after the application of x | + +! Output: +! | rho | double precision | Agreement between the predicted criterion and the real new criterion | +! | cancel_step | logical | If the step must be cancelled | + + +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f new file mode 100644 index 00000000..a313769d --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -0,0 +1,85 @@ +! Apply MO rotation +! Subroutine to apply the rotation matrix to the coefficients of the +! MOs. + +! New MOs = Old MOs . Rotation matrix + +! *Compute the new MOs with the previous MOs and a rotation matrix* + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +! Intent in: +! | R(mo_num,mo_num) | double precision | rotation matrix | + +! Intent out: +! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +! Internal: +! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +! | i,j | integer | indexes | + +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f new file mode 100644 index 00000000..0cf47355 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -0,0 +1,443 @@ +! Rotation matrix + +! *Build a rotation matrix from an antisymmetric matrix* + +! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +! $$ +! \textbf{R}=\exp(\textbf{A}) +! $$ + +! So : +! \begin{align*} +! \textbf{R}=& \exp(\textbf{A}) \\ +! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +! \end{align*} + +! With : +! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +! $\tau$ : $\sqrt{-x}$ +! $x$ : eigenvalues of $\textbf{A}^2$ + +! Input: +! | A(n,n) | double precision | antisymmetric matrix | +! | n | integer | number of columns of the A matrix | +! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +! Output: +! | R(n,n) | double precision | Rotation matrix | +! | info | integer | if info = 0, the execution is successful | +! | | | if info = k, the k-th parameter has an illegal value | +! | | | if info = -k, the algorithm failed | + +! Internal: +! | B(n,n) | double precision | B = A.A | +! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +! | W(n,n) | double precision | eigenvectors of B | +! | e_val(n) | double precision | eigenvalues of B | +! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | +! | part_1a(n,n) | double precision | matrix cos_tau.W^t | +! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +! | part_2a(n,n) | double precision | matrix W^t.A | +! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +! | norm | integer | norm of R.R^t-1, must be equal to 0 | +! | i,j | integer | indexes | + +! Functions: +! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | +! | disnan | logical | Lapack function, check if an element is NaN | + + + +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) + +! Pre-conditions + +! Initialization +info=0 +enforce_step_cancellation = .False. + +! Size of matrix A must be at least 1 by 1 +if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return +endif + +! Leading dimension of A must be >= n +if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return +endif + +! Leading dimension of A must be >= n +if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return +endif + +! Matrix elements of A must by non-NaN +do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo +enddo + +do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif +enddo + +do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo +enddo + +! Fix for too big elements ! bad idea better to cancel if the error is too big +!do j = 1, n +! do i = 1, n +! A(i,j) = mod(A(i,j),2d0*pi) +! if (dabs(A(i,j)) > pi) then +! A(i,j) = 0d0 +! endif +! enddo +!enddo + +max_elem_A = 0d0 +do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo +enddo +print*,'max element in A', max_elem_A + +if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' +endif + +! B=A.A +! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ +! - Diagonalization of $\textbf{B}$ +! W, the eigenvectors +! e_val, the eigenvalues + + +! Compute B=A.A + +call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + +! Copy B in W, diagonalization will put the eigenvectors in W +W=B + +! Diagonalization of B +! Eigenvalues -> e_val +! Eigenvectors -> W +lwork = 3*n-1 +allocate(work(lwork,n)) + +print*,'Starting diagonalization ...' + +call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + +deallocate(work) + +if (info2 == 0) then + print*, 'Diagonalization : Done' +elseif (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' +else + print*, "WARNING: Diagonalization failed to converge" +endif + +! Tau^-1, cos(tau), sin(tau) +! $$\tau = \sqrt{-x}$$ +! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ +! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ +! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ +! These matrices are diagonals + +! Diagonal matrix m_diag +do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif +enddo + +m_diag = 0.d0 +do i = 1, n + m_diag(i,i) = e_val(i) +enddo + +! cos_tau +do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo +enddo + +! sin_tau +do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo +enddo + +! Debug, display the cos_tau and sin_tau matrix +!if (debug) then +! print*, 'cos_tau' +! do i = 1, n +! print*, cos_tau(i,:) +! enddo +! print*, 'sin_tau' +! do i = 1, n +! print*, sin_tau(i,:) +! enddo +!endif + +! tau^-1 +do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo +enddo + +max_elem = 0d0 +do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif +enddo +print*,'max elem tau^-1:', max_elem + +! Debug +!print*,'eigenvalues:' +!do i = 1, n +! print*, e_val(i) +!enddo + +!Debug, display tau^-1 +!if (debug) then +! print*, 'tau^-1' +! do i = 1, n +! print*,tau_m1(i,:) +! enddo +!endif + +! Rotation matrix +! \begin{align*} +! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} +! \begin{align*} +! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} +! \end{align*} +! \begin{align*} +! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} + +! First: +! part_1 = dgemm(W, dgemm(cos_tau, W^t)) +! part_1a = dgemm(cos_tau, W^t) +! part_1 = dgemm(W, part_1a) +! And: +! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) +! part_2a = dgemm(W^t, A) +! part_2b = dgemm(sin_tau, part_2a) +! part_2c = dgemm(tau_m1, part_2b) +! part_2 = dgemm(W, part_2c) +! Finally: +! Rotation matrix, R = part_1+part_2 + +! If $R$ is a rotation matrix: +! $R.R^T=R^T.R=\textbf{1}$ + +! part_1 +call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + +! part_2 +call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) +call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) +call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + +! Rotation matrix R +R = part_1 + part_2 + +! Matrix check +! R.R^t and R^t.R must be equal to identity matrix +do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo +enddo + +call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + +norm = dnrm2(n*n,RR_t,1) +print*, 'Rotation matrix check, norm R.R^T = ', norm + +! Debug +!if (debug) then +! print*, 'RR_t' +! do i = 1, n +! print*, RR_t(i,:) +! enddo +!endif + +! Post conditions + +! Check if R.R^T=1 +max_elem = 0d0 +do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo +enddo + +print*, 'Max error in R.R^T:', max_elem +print*, 'e_val(1):', e_val(1) +print*, 'e_val(n):', e_val(n) +print*, 'max elem in A:', max_elem_A + +if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. +endif + +! Matrix elements of R must by non-NaN +do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo +enddo + +! Display +!if (debug) then +! print*,'Rotation matrix :' +! do i = 1, n +! write(*,'(100(F10.5))') R(i,:) +! enddo +!endif + +! Deallocation, end + +deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f new file mode 100644 index 00000000..75d04352 --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -0,0 +1,64 @@ +! Rotation matrix in a subspace to rotation matrix in the full space + +! Usually, we are using a list of MOs, for exemple the active ones. When +! we compute a rotation matrix to rotate the MOs, we just compute a +! rotation matrix for these MOs in order to reduce the size of the +! matrix which has to be computed. Since the computation of a rotation +! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +! reuce the number of MOs involved. +! After that we replace the rotation matrix in the full space by +! building the elements of the rotation matrix in the full space from +! the elements of the rotation matrix in the subspace and adding some 0 +! on the extradiagonal elements and some 1 on the diagonal elements, +! for the MOs that are not involved in the rotation. + +! Provided: +! | mo_num | integer | Number of MOs | + +! Input: +! | m | integer | Size of tmp_list, m <= mo_num | +! | tmp_list(m) | integer | List of MOs | +! | tmp_R(m,m) | double precision | Rotation matrix in the space of | +! | | | the MOs containing by tmp_list | + +! Output: +! | R(mo_num,mo_num | double precision | Rotation matrix in the space | +! | | | of all the MOs | + +! Internal: +! | i,j | integer | indexes in the full space | +! | tmp_i,tmp_j | integer | indexes in the subspace | + + +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f new file mode 100644 index 00000000..1a18dfb3 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -0,0 +1,119 @@ +! Predicted energy : e_model + +! *Compute the energy predicted by the Taylor series* + +! The energy is predicted using a Taylor expansion truncated at te 2nd +! order : + +! \begin{align*} +! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +! \end{align*} + +! Input: +! | n | integer | m*(m-1)/2 | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | + +! Output: +! | e_model | double precision | predicted energy after the rotation of the MOs | + +! Internal: +! | part_1 | double precision | v_grad^T.x | +! | part_2 | double precision | 1/2 . x^T.H.x | +! | part_2a | double precision | H.x | +! | i,j | integer | indexes | + +! Function: +! | ddot | double precision | dot product (Lapack) | + + +subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n),H(n,n),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) + +! Calculations + +! part_1 corresponds to the product g.x +! part_2a corresponds to the product H.x +! part_2 corresponds to the product 0.5*(x^T.H.x) + +! TODO: remove the dot products + + +! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + print*,'g.x : ', part_1 + !endif + + ! Product H.x + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + print*,'1/2*x^T.H.x : ', part_2 + !endif + + print*,'prev_energy', prev_energy + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'Predicted energy after the rotation : ', e_model + print*, 'Previous energy - predicted energy:', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + print*,'' + +end subroutine diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f new file mode 100644 index 00000000..71f6381d --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -0,0 +1,1655 @@ +! Newton's method to find the optimal lambda + +! *Compute the lambda value for the trust region* + +! This subroutine uses the Newton method in order to find the optimal +! lambda. This constant is added on the diagonal of the hessian to shift +! the eiganvalues. It has a double role: +! - ensure that the resulting hessian is positive definite for the +! Newton method +! - constrain the step in the trust region, i.e., +! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius +! of the trust region. +! We search $\lambda$ which minimizes +! \begin{align*} +! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +! \end{align*} +! or +! \begin{align*} +! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +! \end{align*} +! and gives obviously 0 in both cases. \newline + +! There are several cases: +! - If $\textbf{H}$ is positive definite the interval containing the +! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$ then the interval containing +! the solution is $\lambda \in (-h_1, \infty)$. +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ then the interval containing the solution is +! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < +! 10^{-12}$ are not computed, so the term where $i = 1$ is +! automatically removed and this case becomes similar to the previous one. + +! So to avoid numerical problems (cf. trust_region) we start the +! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +! with $\epsilon$ a little constant. +! The research must be restricted to the interval containing the +! solution. For that reason a little trust region in 1D is used. + +! The Newton method to find the optimal $\lambda$ is : +! \begin{align*} +! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +! \end{align*} +! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +! $\lambda$ at the l-th iteration, +! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +! $\lambda$ at the l-th iteration.\newline + +! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +! \begin{align*} +! y \leq \alpha +! \end{align*} +! with $\alpha$ a scalar representing the trust length (trust region in +! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +! Taylor series truncated at the second order. Thus, if $y > \alpha$, +! the constraint is applied as +! \begin{align*} +! y^* = \alpha \frac{y}{|y|} +! \end{align*} +! with $y^*$ the solution in the trust region. + +! The size of the trust region evolves in function of $\rho$ as for the +! trust region seen previously cf. trust_region, rho_model. +! The prediction of the value of $f$ or $\tilde{f}$ is done using the +! Taylor series truncated at the second order cf. "trust_region", +! "trust_e_model". + +! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +! 1/\Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} +! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! Provided in qp_edit: +! | thresh_rho_2 | +! | thresh_cc | +! | nb_it_max_lambda | +! | version_lambda_search | +! | nb_it_max_pre_search | +! see qp_edit for more details + +! Input: +! | n | integer | m*(m-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +! | delta | double precision | delta for the trust region | + +! Output: +! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +! | | | lambda > 0 | + +! Internal: +! | d1_N | double precision | value of d1_norm_trust_region | +! | d2_N | double precision | value of d2_norm_trust_region | +! | f_N | double precision | value of f_norm_trust_region | +! | prev_f_N | double precision | previous value of f_norm_trust_region | +! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +! | prev_f_R | double precision | previous value of f_R | +! | model | double precision | predicted value of f_R from prev_f_R and y | +! | d_1 | double precision | value of the first derivative | +! | d_2 | double precision | value of the second derivative | +! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +! | prev_lambda | double precision | previous value of lambda | +! | t1,t2,t3 | double precision | wall time | +! | i | integer | index | +! | epsilon | double precision | little constant to avoid numerical problem | +! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +! | version | integer | version of the root finding method | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | f_norm_trust_region | double precision | value of norm(x)^2 | + + + +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + print*,'' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + if (version_lambda_search == 1) then + print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + else + print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + endif + ! Version 2 is normally better + + + +! Resolution with the Newton method: + + +! Initialization + epsilon = 1d-4 + lambda =MAX(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + print*, 'Pre research of lambda:' + print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + print*, 'e_val(1):', e_val(1) + print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + print*,'Threshold for the CC:', thresh_cc + print*,'Threshold for rho_2:', thresh_rho_2 + + print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + if (debug) then + print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + endif + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + print*,'--------------------------------------' + print*,'Research of lambda, iteration:', i + print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + print*,'prev lambda:', prev_lambda + print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + if (version_lambda_search == 1) then + print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + else + print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + endif + + print*,'previous - actual:', prev_f_R - f_R + print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + print*,'' + print*,'lambda, ||x||, delta:' + print*, lambda, dsqrt(f_N), delta + print*,'CC:', DABS(1d0 - f_N/delta**2) + print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations :', i + print*,'Value of lambda :', lambda + print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 + print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'' + print*,'---End trust_newton---' + print*,'' + +end subroutine + +! OMP: First derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | mo_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function + +! OMP: Second derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +! This function computes the second derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. +! \begin{align*} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_accu2 | double precision | temporary array for the third sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function + +! OMP: Function value of ||x||^2 + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | + +! Internal: +! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +! | tmp_fN | double precision | temporary array for the function | +! | i,j | integer | indexes | + + +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function + +! First derivative of (||x||^2 - Delta^2)^2 +! Version without omp + +! *Function to compute the first derivative of ||x||^2 - Delta* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | ddot | double precision | blas dot product | + + +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function + +! Second derivative of (||x||^2 - Delta^2)^2 +! Version without OMP + +! *Function to compute the second derivative of ||x||^2 - Delta* + + +! \begin{equation} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{equation} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +! | ddot | double precision | blas dot product | + + +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function + +! Function value of ||x||^2 +! Version without OMP + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | f_norm_trust_region | double precision | value of norm(x)^2 | +! | ddot | double precision | blas dot product | + + + +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function + +! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end + +! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end + +! First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end + +! Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f new file mode 100644 index 00000000..eb280ef0 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -0,0 +1,121 @@ +! Agreement with the model: Rho + +! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +! Rho represents the agreement between the model (the predicted energy +! by the Taylor expansion truncated at the 2nd order) and the real +! energy : + +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} +! With : +! $E^{k}$ the energy at the previous iteration +! $E^{k+1}$ the energy at the actual iteration +! $m^{k+1}$ the predicted energy for the actual iteration +! (cf. trust_e_model) + +! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +! If $\rho \leq 0$ the previous energy is lower than the actual +! energy. We have to cancel the last step and use a smaller trust +! region. +! Here we cancel the last step if $\rho < 0.1$, because even if +! the energy decreases, the agreement is bad, i.e., the Taylor expansion +! truncated at the second order doesn't represent correctly the energy +! landscape. So it's better to cancel the step and restart with a +! smaller trust region. + +! Provided in qp_edit: +! | thresh_rho | + +! Input: +! | prev_energy | double precision | previous energy (energy before the rotation) | +! | e_model | double precision | predicted energy after the rotation | + +! Output: +! | rho | double precision | the agreement between the model (predicted) and the real energy | +! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +! | | | else the previous energy doesn't change | + +! Internal: +! | energy | double precision | energy (real) after the rotation | +! | i | integer | index | +! | t* | double precision | time | + + +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + call wall_time(t1) + +! Rho +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} + +! In function of $\rho$ th step can be accepted or cancelled. + +! If we cancel the last step (k+1), the previous energy (k) doesn't +! change! +! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + + +! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + print*, 'previous energy, prev_energy :', prev_energy + print*, 'predicted energy, e_model :', e_model + print*, 'real energy, energy :', energy + print*, 'prev_energy - energy :', prev_energy - energy + print*, 'prev_energy - e_model :', prev_energy - e_model + print*, 'Rho :', rho + print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + print*,'' + +end subroutine diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f new file mode 100644 index 00000000..afa19dec --- /dev/null +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -0,0 +1,716 @@ +! Trust region + +! *Compute the next step with the trust region algorithm* + +! The Newton method is an iterative method to find a minimum of a given +! function. It uses a Taylor series truncated at the second order of the +! targeted function and gives its minimizer. The minimizer is taken as +! the new position and the same thing is done. And by doing so +! iteratively the method find a minimum, a local or global one depending +! of the starting point and the convexity/nonconvexity of the targeted +! function. + +! The goal of the trust region is to constrain the step size of the +! Newton method in a certain area around the actual position, where the +! Taylor series is a good approximation of the targeted function. This +! area is called the "trust region". + +! In addition, in function of the agreement between the Taylor +! development of the energy and the real energy, the size of the trust +! region will be updated at each iteration. By doing so, the step sizes +! are not too larges. In addition, since we add a criterion to cancel the +! step if the energy increases (more precisely if rho < 0.1), so it's +! impossible to diverge. \newline + +! References: \newline +! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +! ISBN: 978-0-387-40065-5 \newline + +! By using the first and the second derivatives, the Newton method gives +! a step: +! \begin{align*} +! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot +! \textbf{g}_{(k)} +! \end{align*} +! which leads to the minimizer of the Taylor series. +! !!! Warning: the Newton method gives the minimizer if and only if +! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! +! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +! \begin{align*} +! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +! \end{align*} +! which is equivalent to +! \begin{align*} +! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +! \end{align*} + +! with: \newline +! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +! size n) \newline +! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +! matrix) \newline +! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +! size n) \newline +! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +! \newline + +! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +! hypersphere of radius $\Delta_{(k+1)}$.\newline + +! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +! $\textbf{H}$ is positive definite, the +! solution is the step given by the Newton method +! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +! Else we have to constrain the step size. For simplicity we will remove +! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +! Starting from the Taylor series of a function E (here, the energy) +! truncated at the 2nd order, we have: +! \begin{align*} +! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} +! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + +! \mathcal{O}(\textbf{x}^2) +! \end{align*} + +! With the constraint on the norm of $\textbf{x}$ we can write the +! Lagrangian +! \begin{align*} +! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} +! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +! \end{align*} +! Where: \newline +! $\lambda$ is the Lagrange multiplier \newline +! $E$ is the energy at the k-th iteration $\Leftrightarrow +! E(\textbf{x} = \textbf{0})$ \newline + +! To solve this equation, we search a stationary point where the first +! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +! \end{align*} + +! The derivative is: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! \end{align*} + +! So, we search $\textbf{x}$ such as: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +! \end{align*} + +! We can rewrite that as: +! \begin{align*} +! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +! \end{align*} +! with $\textbf{I}$ is the identity matrix. + +! By doing so, the solution is: +! \begin{align*} +! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +! \end{align*} +! \begin{align*} +! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} +! with $\textbf{x}^T \textbf{x} = \Delta^2$. + +! We have to solve this previous equation to find this $\textbf{x}$ in the +! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +! just a one dimension problem because we can express $\textbf{x}$ as a +! function of $\lambda$: +! \begin{align*} +! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} + +! We start from the fact that the hessian is diagonalizable. So we have: +! \begin{align*} +! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +! \end{align*} +! with: \newline +! $\textbf{H}$, the hessian matrix \newline +! $\textbf{W}$, the matrix containing the eigenvectors \newline +! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +! $h_i$, the i-th eigenvalue in ascending order \newline + +! Now we use the fact that adding a constant on the diagonal just shifts +! the eigenvalues: +! \begin{align*} +! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} +! +\textbf{I} \lambda) \cdot \textbf{W}^T +! \end{align*} + +! By doing so we can express $\textbf{x}$ as a function of $\lambda$ +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! with $\lambda \neq - h_i$. + +! An interesting thing in our case is the norm of $\textbf{x}$, +! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot +! \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +! And if we study the properties of this function we see that: +! \begin{align*} +! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +! \end{align*} +! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +! \begin{align*} +! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +! \end{align*} + +! From these limits and knowing that $h_1$ is the lowest eigenvalue, we +! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +! strictly decreasing function on the interval $\lambda \in +! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +! solution. + +! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +! Newton method is only defined for a positive definite hessian matrix, +! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +! definite. Consequently, in the case where $\textbf{H}$ is not positive +! definite, to ensure the positive definiteness, $\lambda$ must be +! greater than $- h_1$. +! \begin{align*} +! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +! \end{align*} + +! From that there are five cases: +! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} +! \lambda)$ +! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is +! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = +! 0)|| \leq \Delta$ +! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point + +! Thus to find the solution, we can write: +! \begin{align*} +! ||\textbf{x}(\lambda)|| = \Delta +! \end{align*} +! \begin{align*} +! ||\textbf{x}(\lambda)|| - \Delta = 0 +! \end{align*} + +! Taking the square of this equation +! \begin{align*} +! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +! \end{align*} +! we have a function with one minimum for the optimal $\lambda$. +! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} + +! But in practice, it is more effective to solve: +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} + +! To do that, we just use the Newton method with "trust_newton" using +! first and second derivative of $(||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\textbf{x}$. +! This will give the optimal $\lambda$ to compute the +! solution $\textbf{x}$ with the formula seen previously: +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} + +! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + + + + +! Evolution of the trust region + +! We initialize the trust region at the first iteration using a radius +! \begin{align*} +! \Delta = ||\textbf{x}(\lambda=0)|| +! \end{align*} + +! And for the next iteration the trust region will evolves depending of +! the agreement of the energy prediction based on the Taylor series +! truncated at the 2nd order and the real energy. If the Taylor series +! truncated at the 2nd order represents correctly the energy landscape +! the trust region will be extent else it will be reduced. In order to +! mesure this agreement we use the ratio rho cf. "rho_model" and +! "trust_e_model". From that we use the following values: +! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +! In addition, if $\rho < 0.1$ the iteration is cancelled, so it +! restarts with a smaller trust region until the energy decreases. + + + + +! Summary + +! To summarize, knowing the hessian (eigenvectors and eigenvalues), the +! gradient and the radius of the trust region we can compute the norm of +! the Newton step +! \begin{align*} +! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n +! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +! \end{align*} + +! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and +! $\textbf{x}(\lambda=0)$ is in the trust region and it is not +! necessary to put a constraint on $\textbf{x}$, the solution is the +! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in +! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = +! -h_1)$, similarly to the previous case. +! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we +! have to search $\lambda \in (-h_1, \infty)$ such as +! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} +! or +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} +! which is numerically more stable. And finally compute +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we +! do exactly the same thing that the previous case but we search +! $\lambda \in (0, \infty)$ +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the +! sum), again we do exactly the same thing that the previous case +! searching $\lambda \in (-h_1, \infty)$. + + +! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +! necessary in fact to remove the $j = 1$ in the sum since the term +! where $h_i - \lambda < 10^{-6}$ are not computed. + +! After that, we take this vector $\textbf{x}^*$, called "x", and we do +! the transformation to an antisymmetric matrix $\textbf{X}$, called +! m_x. This matrix $\textbf{X}$ will be used to compute a rotation +! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +! NB: +! An improvement can be done using a elleptical trust region. + + + + +! Code + +! Provided: +! | mo_num | integer | number of MOs | + +! Cf. qp_edit in orbital optimization section, for some constants/thresholds + +! Input: +! | m | integer | number of MOs | +! | n | integer | m*(m-1)/2 | +! | H(n, n) | double precision | hessian | +! | v_grad(n) | double precision | gradient | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n, n) | double precision | eigenvectors of the hessian | +! | rho | double precision | agreement between the model and the reality, | +! | | | represents the quality of the energy prediction | +! | nb_iter | integer | number of iteration | + +! Input/Ouput: +! | delta | double precision | radius of the trust region | + +! Output: +! | x(n) | double precision | vector containing the step | + +! Internal: +! | accu | double precision | temporary variable to compute the step | +! | lambda | double precision | lagrange multiplier | +! | trust_radius2 | double precision | square of the radius of the trust region | +! | norm2_x | double precision | norm^2 of the vector x | +! | norm2_g | double precision | norm^2 of the vector containing the gradient | +! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +! | i, j, k | integer | indexes | + +! Function: +! | dnrm2 | double precision | Blas function computing the norm | +! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + + +subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) + +! Initialization and norm + +! The norm of the step size will be useful for the trust region +! algorithm. We start from a first guess and the radius of the trust +! region will evolve during the optimization. + +! avoid_saddle is actually a test to avoid saddle points + + +! Initialization of the Lagrange multiplier +lambda = 0d0 + +! List of w^T.g, to avoid the recomputation +tmp_wtg = 0d0 +do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo +enddo + +! Replacement of the small tmp_wtg corresponding to a negative eigenvalue +! in the case of avoid_saddle +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + +endif + +! Norm^2 of x, ||x||^2 +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) +! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta +! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm +! Anyway if the step is too big it will be reduced +print*,'||x||^2 :', norm2_x + +! Norm^2 of the gradient, ||v_grad||^2 +norm2_g = (dnrm2(n,v_grad,1))**2 +print*,'||grad||^2 :', norm2_g + +! Trust radius initialization + +! At the first iteration (nb_iter = 0) we initialize the trust region +! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = +! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, +! we compute this norm using f_norm_trust_region_omp as explain just +! below) + + +! trust radius +if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) +endif + +! Modification of the trust radius + +! In function of rho (which represents the agreement between the model +! and the reality, cf. rho_model) the trust region evolves. We update +! delta (the radius of the trust region). + +! To avoid too big trust region we put a maximum size. + + +! Modification of the trust radius in function of rho +if (rho >= 0.75d0) then + delta = 2d0 * delta +elseif (rho >= 0.5d0) then + delta = delta +elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta +else + delta = 0.25d0 * delta +endif + +! Maximum size of the trust region +!if (delta > 0.5d0 * n * pi) then +! delta = 0.5d0 * n * pi +! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' +!endif + +if (delta > 1d10) then + delta = 1d10 +endif + +print*, 'Delta :', delta + +! Calculation of the optimal lambda + +! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant +! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the +! unconstrained one, $\lambda = 0$ + +! You will find more details at the beginning + + +! By giving delta, we search (||x||^2 - delta^2)^2 = 0 +! and not (||x||^2 - delta)^2 = 0 + +! Research of lambda to solve ||x(lambda)|| = Delta + +! Display +print*, 'e_val(1) = ', e_val(1) +print*, 'w_1^T.g =', tmp_wtg(1) + +! H positive definite +if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + print*, '||x(0)||=', dsqrt(norm2_x) + print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +! H indefinite +else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + if (DABS(tmp_wtg(1)) < thresh_wtg) then + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + else + print*, 'H indefinite, w_1^T.g =/= 0' + endif + print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +endif + +! Recomputation of the norm^2 of the step x +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) +print*,'' +print*,'Summary after the trust region:' +print*,'lambda:', lambda +print*,'||x||:', dsqrt(norm2_x) +print*,'delta:', delta + +! Calculation of the step x + +! x refers to $\textbf{x}^*$ +! We compute x in function of lambda using its formula : +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i +! + \lambda} \cdot \textbf{w}_i +! \end{align*} + + +! Initialisation +x = 0d0 + +! Calculation of the step x + +! Normal version +if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + +! Version to use the absolute value of the eigenvalues +else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + +endif + +double precision :: beta, norm_x + +! Test +! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) +! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first +! eigenvectors multiply by a constant to ensure the condition +! ||x(lambda=-e_val(1))|| = delta and escape the saddle point +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif +endif + +! Transformation of x + +! x is a vector of size n, so it can be write as a m by m +! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end From 25c868f7dc8b852422bc3013d29828b1dcc25d40 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 13:48:50 +0100 Subject: [PATCH 41/70] remove trash --- .../rotation_matrix_iterative.org | 136 ------------------ 1 file changed, 136 deletions(-) delete mode 100644 src/utils_trust_region/rotation_matrix_iterative.org diff --git a/src/utils_trust_region/rotation_matrix_iterative.org b/src/utils_trust_region/rotation_matrix_iterative.org deleted file mode 100644 index f6cc9909..00000000 --- a/src/utils_trust_region/rotation_matrix_iterative.org +++ /dev/null @@ -1,136 +0,0 @@ -* Rotation matrix with the iterative method - -\begin{align*} -\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k -\end{align*} - -!!! Doesn't work !!! - -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f -subroutine rotation_matrix_iterative(m,X,R) - - implicit none - - ! in - integer, intent(in) :: m - double precision, intent(in) :: X(m,m) - - ! out - double precision, intent(out) :: R(m,m) - - ! internal - double precision :: max_elem, pre_factor - double precision :: t1,t2,t3 - integer :: k,l,i,j - logical :: not_converged - double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) - - ! Functions - integer :: factorial - - print*,'---rotation_matrix_iterative---' - call wall_time(t1) - - allocate(RRT(m,m),A(m,m),B(m,m)) - - ! k = 0 - R = 0d0 - do i = 1, m - R(i,i) = 1d0 - enddo - - ! k = 1 - R = R + X - - k = 2 - - not_converged = .True. - - do while (not_converged) - - pre_factor = 1d0/DBLE(factorial(k)) - if (pre_factor < 1d-15) then - print*,'pre factor=', pre_factor,'< 1d-15, exit' - exit - endif - - A = X - B = 0d0 - do l = 1, k-1 - call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) - A = B - enddo - - !print*,'B' - !do i = 1, m - ! print*,B(i,:) * 1d0/DBLE(factorial(k)) - !enddo - - R = R + pre_factor * B - - k = k + 1 - call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) - - !print*,'R' - !do i = 1, m - ! write(*,'(10(E12.5))') R(i,:) - !enddo - - do i = 1, m - RRT(i,i) = RRT(i,i) - 1d0 - enddo - - !print*,'RRT' - !do i = 1, m - ! write(*,'(10(E12.5))') RRT(i,:) - !enddo - - max_elem = 0d0 - do j = 1, m - do i = 1, m - if (dabs(RRT(i,j)) > max_elem) then - max_elem = dabs(RRT(i,j)) - endif - enddo - enddo - - print*, 'Iteration:', k - print*, 'Max error in R:', max_elem - - if (max_elem < 1d-12) then - not_converged = .False. - endif - - enddo - - deallocate(RRT,A,B) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rotation matrix iterative:', t3 - print*,'---End roration_matrix_iterative---' - - -print*,'Does not work yet, abort' -call abort - -end -#+END_SRC - -** Factorial -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f -function factorial(n) - - implicit none - - integer, intent(in) :: n - integer :: factorial, k - - factorial = 1 - - do k = 1, n - factorial = factorial * k - enddo - -end -#+END_SRC From 833999dfbbdd28a02a40ad68f47f49811c69a603 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 16:06:39 +0100 Subject: [PATCH 42/70] init commit mo_localization --- src/mo_localization/NEED | 2 + src/mo_localization/README.md | 94 + src/mo_localization/TANGLE_org_mode.sh | 7 + src/mo_localization/break_spatial_sym.org | 43 + src/mo_localization/debug_gradient_loc.org | 64 + src/mo_localization/debug_hessian_loc.org | 64 + src/mo_localization/kick_the_mos.org | 33 + src/mo_localization/localization.org | 2460 ++++++++++++++++++++ 8 files changed, 2767 insertions(+) create mode 100644 src/mo_localization/NEED create mode 100644 src/mo_localization/README.md create mode 100755 src/mo_localization/TANGLE_org_mode.sh create mode 100644 src/mo_localization/break_spatial_sym.org create mode 100644 src/mo_localization/debug_gradient_loc.org create mode 100644 src/mo_localization/debug_hessian_loc.org create mode 100644 src/mo_localization/kick_the_mos.org create mode 100644 src/mo_localization/localization.org diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..e9d51654 --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,2 @@ +hartree_fock +utils_trust_region diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..215c4ca9 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,94 @@ +Please, use the ifort compiler + +Some parameters can be changed with qp edit in the utils_trust_region section + +If you modify the .org files, don't forget to do (you need emacs): +``` +./TANGLE_org_mode.sh +ninja +``` + +The documentation can be read using: +Ctrl-C Ctrl-e l p +after opening the filename.org in emacs. It will produce a +filename.pdf. +(Not available for all the files) +!!! Warning: the documentation can contain some errors !!! + +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +After that the ezfio directory contains the localized MOs + +But the mo_class must be defined before, run +``` +qp set_mo_class -q +``` +for more information or +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +to set the mo classes. We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. + +If you just on kind of mo class to localize all the MOs between them +you have to put: +``` +qp set mo_localization security_mo_class false +``` + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a given rotation between the MOs. +This feature can be removed by setting: +``` +qp set mo_localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set mo_localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set mo_localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set mo_localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set mo_localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set mo_localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set mo_localization angle_pre_rot 1e-3 +``` + +# Further improvements: +- Cleaner repo +- Correction of the errors in the documentations +- option with/without trust region diff --git a/src/mo_localization/TANGLE_org_mode.sh b/src/mo_localization/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/break_spatial_sym.org b/src/mo_localization/break_spatial_sym.org new file mode 100644 index 00000000..5995b138 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.org @@ -0,0 +1,43 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbitals:' + print*, 'qp set orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end +#+END_SRC diff --git a/src/mo_localization/debug_gradient_loc.org b/src/mo_localization/debug_gradient_loc.org new file mode 100644 index 00000000..1fcb7fca --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.org @@ -0,0 +1,64 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/debug_hessian_loc.org b/src/mo_localization/debug_hessian_loc.org new file mode 100644 index 00000000..bc23818c --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.org @@ -0,0 +1,64 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:,:), H2(:,:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(H(n,n),H2(n,n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i,i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i,i)) > threshold) then + print*,H(i,i) + nb_error = nb_error + 1 + if (dabs(H(i,i)) > max_elem) then + max_elem = H(i,i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/kick_the_mos.org b/src/mo_localization/kick_the_mos.org new file mode 100644 index 00000000..3d57da37 --- /dev/null +++ b/src/mo_localization/kick_the_mos.org @@ -0,0 +1,33 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbital:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end +#+END_SRC diff --git a/src/mo_localization/localization.org b/src/mo_localization/localization.org new file mode 100644 index 00000000..33b52e7d --- /dev/null +++ b/src/mo_localization/localization.org @@ -0,0 +1,2460 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +lpqp set_mo_class -h + +*** Foster-Boys localization +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Locality of the orbitals: +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + call run_localization +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method (with qp_edit) + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (n_act_orb == mo_num .or. & + n_core_orb + n_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to localize all the orbitals:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + enddo + + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + v_grad = 0d0 + max_elem = 0d0 + norm_grad = 0d0 + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + H = 0d0 + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + criterion = 0d0 + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + endif +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'' + print*,'---End gradient_FB---' + print*,'' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'' + print*,'---End gradient_FB_omp---' + print*,'' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + print*,'' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'' + print*,'---End hessian_FB---' + print*,'' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do j = 1, tmp_n + do i = 1, tmp_n + H(i,j) = 0d0 + enddo + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'' + print*,'---End hessian_FB_omp---' + print*,'' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'' + print*,'---End gradient_PM---' + print*,'' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) < max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Min elem H:', max_elem +! +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) > max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Max elem H:', max_elem +! +! max_elem = 1d10 +! do tmp_k = 1, tmp_n +! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'' + print*,'---End hessian_PM---' + print*,'' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + print*,'' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + print*,'' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:), tmp_list(:) + +! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) +! +! do i = 1, mo_num +! fock_energies_tmp(i) = Fock_matrix_diag_mo(i) +! print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i) +! iorder(i) = i +! enddo +! +! print*,'' +! print*,'Sorting by Fock energies' +! print*,'' +! +! call dsort(fock_energies_tmp, iorder, mo_num) +! +! do i = 1, mo_num +! k = iorder(i) +! print*,'fock_energies_new(i) = ',fock_energies_tmp(i) +! do j = 1, ao_num +! new_mo_coef(j,i) = mo_coef(j,k) +! enddo +! enddo + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + From f77d52cbd4f8c573e2b25c5a066664d9425c4027 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 16:08:36 +0100 Subject: [PATCH 43/70] mo_localization file.irp.f --- src/mo_localization/break_spatial_sym.irp.f | 42 + src/mo_localization/debug_gradient_loc.irp.f | 62 + src/mo_localization/debug_hessian_loc.irp.f | 62 + src/mo_localization/kick_the_mos.irp.f | 31 + src/mo_localization/localization.irp.f | 456 +++++ src/mo_localization/localization_sub.irp.f | 1787 ++++++++++++++++++ 6 files changed, 2440 insertions(+) create mode 100644 src/mo_localization/break_spatial_sym.irp.f create mode 100644 src/mo_localization/debug_gradient_loc.irp.f create mode 100644 src/mo_localization/debug_hessian_loc.irp.f create mode 100644 src/mo_localization/kick_the_mos.irp.f create mode 100644 src/mo_localization/localization.irp.f create mode 100644 src/mo_localization/localization_sub.irp.f diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..6a1003df --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,42 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbitals:' + print*, 'qp set orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..a0e5432d --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,62 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..bfb1cbbc --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,62 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:,:), H2(:,:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(H(n,n),H2(n,n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i,i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i,i)) > threshold) then + print*,H(i,i) + nb_error = nb_error + 1 + if (dabs(H(i,i)) > max_elem) then + max_elem = H(i,i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..3ca90fb5 --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,31 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbital:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..6364851d --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,456 @@ +program localization + call run_localization +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method (with qp_edit) + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (n_act_orb == mo_num .or. & + n_core_orb + n_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to localize all the orbitals:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + +! Loc + + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + enddo + + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..90a4bfb5 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,1787 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + v_grad = 0d0 + max_elem = 0d0 + norm_grad = 0d0 + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + H = 0d0 + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + criterion = 0d0 + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + endif +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'' + print*,'---End gradient_FB---' + print*,'' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'' + print*,'---End gradient_FB_omp---' + print*,'' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + print*,'' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'' + print*,'---End hessian_FB---' + print*,'' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do j = 1, tmp_n + do i = 1, tmp_n + H(i,j) = 0d0 + enddo + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'' + print*,'---End hessian_FB_omp---' + print*,'' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'' + print*,'---End gradient_PM---' + print*,'' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) < max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Min elem H:', max_elem +! +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) > max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Max elem H:', max_elem +! +! max_elem = 1d10 +! do tmp_k = 1, tmp_n +! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'' + print*,'---End hessian_PM---' + print*,'' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + print*,'' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + print*,'' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:), tmp_list(:) + +! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) +! +! do i = 1, mo_num +! fock_energies_tmp(i) = Fock_matrix_diag_mo(i) +! print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i) +! iorder(i) = i +! enddo +! +! print*,'' +! print*,'Sorting by Fock energies' +! print*,'' +! +! call dsort(fock_energies_tmp, iorder, mo_num) +! +! do i = 1, mo_num +! k = iorder(i) +! print*,'fock_energies_new(i) = ',fock_energies_tmp(i) +! do j = 1, ao_num +! new_mo_coef(j,i) = mo_coef(j,k) +! enddo +! enddo + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end From 700041dfc3686a00f05409151212c8bdba1f790e Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 16:17:46 +0100 Subject: [PATCH 44/70] Doc --- src/mo_localization/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md index 215c4ca9..e49dcd24 100644 --- a/src/mo_localization/README.md +++ b/src/mo_localization/README.md @@ -1,3 +1,5 @@ +# mo_localization + Please, use the ifort compiler Some parameters can be changed with qp edit in the utils_trust_region section From ea73b5de7ddf2c00dac1f19a7c9a7d06da62bc0d Mon Sep 17 00:00:00 2001 From: ydamour Date: Sat, 5 Nov 2022 16:47:00 +0100 Subject: [PATCH 45/70] update doc loc --- src/mo_localization/README.md | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md index e49dcd24..3d692987 100644 --- a/src/mo_localization/README.md +++ b/src/mo_localization/README.md @@ -1,22 +1,17 @@ # mo_localization -Please, use the ifort compiler - -Some parameters can be changed with qp edit in the utils_trust_region section - +Some parameters can be changed with qp edit in the mo_localization section +(cf below). Similarly for the trust region parameters in the +utils_trust_region section. The localization without the trust region +is not available for the moment. + +The irf.f files can be generated from the org ones using emacs. If you modify the .org files, don't forget to do (you need emacs): ``` ./TANGLE_org_mode.sh ninja ``` -The documentation can be read using: -Ctrl-C Ctrl-e l p -after opening the filename.org in emacs. It will produce a -filename.pdf. -(Not available for all the files) -!!! Warning: the documentation can contain some errors !!! - # Orbital localisation To localize the MOs: ``` @@ -24,7 +19,7 @@ qp run localization ``` After that the ezfio directory contains the localized MOs -But the mo_class must be defined before, run +But to do so the mo_class must be defined before, run ``` qp set_mo_class -q ``` @@ -44,7 +39,7 @@ qp set mo_localization security_mo_class false Before the localization, a kick is done for each mo class (except the deleted ones) to break the MOs. This is done by -doing a given rotation between the MOs. +doing a rotation between the MOs. This feature can be removed by setting: ``` qp set mo_localization kick_in_mos false From 65202688cca9618323fcc23086a2c741c64de3d8 Mon Sep 17 00:00:00 2001 From: ydamour Date: Sun, 6 Nov 2022 00:39:32 +0100 Subject: [PATCH 46/70] update doc 1 --- src/utils_trust_region/algo_trust.org | 8 ++-- src/utils_trust_region/apply_mo_rotation.org | 4 +- src/utils_trust_region/rotation_matrix.org | 4 +- .../sub_to_full_rotation_matrix.org | 4 +- .../trust_region_expected_e.org | 4 +- .../trust_region_optimal_lambda.org | 44 +++++++++---------- src/utils_trust_region/trust_region_rho.org | 4 +- src/utils_trust_region/trust_region_step.org | 4 +- src/utils_trust_region/vec_to_mat_index.org | 4 +- src/utils_trust_region/vec_to_mat_v2.org | 4 +- 10 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org index 40742b45..aa836f98 100644 --- a/src/utils_trust_region/algo_trust.org +++ b/src/utils_trust_region/algo_trust.org @@ -136,9 +136,9 @@ subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho, include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the step and the expected criterion/energy after the step - !END_DOC + END_DOC implicit none @@ -208,9 +208,9 @@ subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, cri include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute if the step should be cancelled - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org index 955997e9..79346119 100644 --- a/src/utils_trust_region/apply_mo_rotation.org +++ b/src/utils_trust_region/apply_mo_rotation.org @@ -25,9 +25,9 @@ subroutine apply_mo_rotation(R,prev_mos) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the new MOs knowing the rotation matrix - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org index 63a8ae44..73ba0298 100644 --- a/src/utils_trust_region/rotation_matrix.org +++ b/src/utils_trust_region/rotation_matrix.org @@ -61,10 +61,10 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) implicit none - !BEGIN_DOC + BEGIN_DOC ! Rotation matrix to rotate the molecular orbitals. ! If the rotation is too large the transformation is not unitary and must be cancelled. - !END_DOC + END_DOC include 'pi.h' diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org index f0cf0bfc..16434dc8 100644 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.org +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org @@ -32,9 +32,9 @@ Internal: #+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - !BEGIN_DOC + BEGIN_DOC ! Compute the full rotation matrix from a smaller one - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org index 2ada7768..58c8f804 100644 --- a/src/utils_trust_region/trust_region_expected_e.org +++ b/src/utils_trust_region/trust_region_expected_e.org @@ -33,9 +33,9 @@ subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the expected criterion/energy after the application of the step x - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org index 6ed99d1d..39173f5d 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ b/src/utils_trust_region/trust_region_optimal_lambda.org @@ -153,9 +153,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Research the optimal lambda to constrain the step size in the trust region - !END_DOC + END_DOC implicit none @@ -508,9 +508,9 @@ function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -645,9 +645,9 @@ function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -794,9 +794,9 @@ function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute ||x(lambda)||^2 - !END_DOC + END_DOC implicit none @@ -906,9 +906,9 @@ function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1004,9 +1004,9 @@ function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1107,9 +1107,9 @@ function f_norm_trust_region(n,e_val,tmp_wtg,lambda) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute ||x(lambda)||^2 - !END_DOC + END_DOC implicit none @@ -1190,9 +1190,9 @@ function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1347,9 +1347,9 @@ function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1506,9 +1506,9 @@ function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1601,9 +1601,9 @@ function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org index 698e1fab..9b25ee29 100644 --- a/src/utils_trust_region/trust_region_rho.org +++ b/src/utils_trust_region/trust_region_rho.org @@ -47,9 +47,9 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute rho, the agreement between the predicted criterion/energy and the real one - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_step.org b/src/utils_trust_region/trust_region_step.org index 85832672..331453a3 100644 --- a/src/utils_trust_region/trust_region_step.org +++ b/src/utils_trust_region/trust_region_step.org @@ -375,9 +375,9 @@ subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compuet the step in the trust region - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org index 13b1b5ee..0a09fa86 100644 --- a/src/utils_trust_region/vec_to_mat_index.org +++ b/src/utils_trust_region/vec_to_mat_index.org @@ -36,10 +36,10 @@ subroutine vec_to_mat_index(i,p,q) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing ! its index i a vector - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org index 4ce5f5e1..4e358a88 100644 --- a/src/utils_trust_region/vec_to_mat_v2.org +++ b/src/utils_trust_region/vec_to_mat_v2.org @@ -8,9 +8,9 @@ Can be done in OMP (for the first part and with omp critical for the second) #+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f subroutine vec_to_mat_v2(n,m,v_x,m_x) - !BEGIN_DOC + BEGIN_DOC ! Vector to antisymmetric matrix - !END_DOC + END_DOC implicit none From cada90b6750d5740f0029b95f826e158defd88fe Mon Sep 17 00:00:00 2001 From: ydamour Date: Sun, 6 Nov 2022 00:40:11 +0100 Subject: [PATCH 47/70] update doc 2 --- src/utils_trust_region/algo_trust.irp.f | 8 ++-- .../apply_mo_rotation.irp.f | 4 +- src/utils_trust_region/rotation_matrix.irp.f | 4 +- .../sub_to_full_rotation_matrix.irp.f | 4 +- .../trust_region_expected_e.irp.f | 4 +- .../trust_region_optimal_lambda.irp.f | 44 +++++++++---------- src/utils_trust_region/trust_region_rho.irp.f | 4 +- .../trust_region_step.irp.f | 4 +- src/utils_trust_region/vec_to_mat_v2.irp.f | 4 +- 9 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f index bfb3a0ff..eac17275 100644 --- a/src/utils_trust_region/algo_trust.irp.f +++ b/src/utils_trust_region/algo_trust.irp.f @@ -137,9 +137,9 @@ subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho, include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the step and the expected criterion/energy after the step - !END_DOC + END_DOC implicit none @@ -210,9 +210,9 @@ subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, cri include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute if the step should be cancelled - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f index a313769d..42aac5f7 100644 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -25,9 +25,9 @@ subroutine apply_mo_rotation(R,prev_mos) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the new MOs knowing the rotation matrix - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f index 0cf47355..4738fd67 100644 --- a/src/utils_trust_region/rotation_matrix.irp.f +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -61,10 +61,10 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) implicit none - !BEGIN_DOC + BEGIN_DOC ! Rotation matrix to rotate the molecular orbitals. ! If the rotation is too large the transformation is not unitary and must be cancelled. - !END_DOC + END_DOC include 'pi.h' diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f index 75d04352..bdd1f6ba 100644 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -32,9 +32,9 @@ subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - !BEGIN_DOC + BEGIN_DOC ! Compute the full rotation matrix from a smaller one - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f index 1a18dfb3..b7d849d1 100644 --- a/src/utils_trust_region/trust_region_expected_e.irp.f +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -33,9 +33,9 @@ subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the expected criterion/energy after the application of the step x - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index 71f6381d..f007a860 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -153,9 +153,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Research the optimal lambda to constrain the step size in the trust region - !END_DOC + END_DOC implicit none @@ -508,9 +508,9 @@ function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -644,9 +644,9 @@ function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -792,9 +792,9 @@ function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute ||x(lambda)||^2 - !END_DOC + END_DOC implicit none @@ -903,9 +903,9 @@ function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1000,9 +1000,9 @@ function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1102,9 +1102,9 @@ function f_norm_trust_region(n,e_val,tmp_wtg,lambda) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute ||x(lambda)||^2 - !END_DOC + END_DOC implicit none @@ -1184,9 +1184,9 @@ function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1340,9 +1340,9 @@ function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1498,9 +1498,9 @@ function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none @@ -1592,9 +1592,9 @@ function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f index eb280ef0..45738736 100644 --- a/src/utils_trust_region/trust_region_rho.irp.f +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -47,9 +47,9 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compute rho, the agreement between the predicted criterion/energy and the real one - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f index afa19dec..42aa6ed4 100644 --- a/src/utils_trust_region/trust_region_step.irp.f +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -375,9 +375,9 @@ subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) include 'pi.h' - !BEGIN_DOC + BEGIN_DOC ! Compuet the step in the trust region - !END_DOC + END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f index e184d3ba..9140b8d3 100644 --- a/src/utils_trust_region/vec_to_mat_v2.irp.f +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -8,9 +8,9 @@ subroutine vec_to_mat_v2(n,m,v_x,m_x) - !BEGIN_DOC + BEGIN_DOC ! Vector to antisymmetric matrix - !END_DOC + END_DOC implicit none From 2aca8aa166f3537e04b7cfd1bea0ed5b7163b131 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 7 Nov 2022 18:39:54 +0100 Subject: [PATCH 48/70] ezfio.cfg --- src/mo_localization/EZFIO.cfg | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 src/mo_localization/EZFIO.cfg diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..f59c3efd --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,48 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization +interface: ezfio,provider,ocaml +default: 1000 + +[security_mo_class] +type: logical +doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, apply a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: Define the angle for the rotation of the MOs before the localization (in rad) +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, sorts the MOs using the diagonal elements of the Fock matrix +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + From d827a909c4eca40a60f1b7ac6bbf7afa5c087346 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 8 Nov 2022 10:15:47 +0100 Subject: [PATCH 49/70] pi.h --- src/utils_trust_region/pi.h | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/utils_trust_region/pi.h diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h new file mode 100644 index 00000000..2c36a9f0 --- /dev/null +++ b/src/utils_trust_region/pi.h @@ -0,0 +1,2 @@ + logical, parameter :: debug=.False. + double precision, parameter :: pi = 3.1415926535897932d0 From 14841c5a6f84a0b12c45cd5716507fbf5e2b6a23 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 8 Nov 2022 10:20:59 +0100 Subject: [PATCH 50/70] remove debug --- src/utils_trust_region/apply_mo_rotation.irp.f | 12 ++++++------ src/utils_trust_region/apply_mo_rotation.org | 12 ++++++------ src/utils_trust_region/pi.h | 2 +- .../trust_region_optimal_lambda.irp.f | 8 ++++---- .../trust_region_optimal_lambda.org | 6 +++--- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f index 42aac5f7..e274ec11 100644 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -60,12 +60,12 @@ subroutine apply_mo_rotation(R,prev_mos) prev_mos = mo_coef mo_coef = new_mos - if (debug) then - print*,'New mo_coef : ' - do i = 1, mo_num - write(*,'(100(F10.5))') mo_coef(i,:) - enddo - endif + !if (debug) then + ! print*,'New mo_coef : ' + ! do i = 1, mo_num + ! write(*,'(100(F10.5))') mo_coef(i,:) + ! enddo + !endif ! Save the new MOs and change the label mo_label = 'MCSCF' diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org index 79346119..918581b7 100644 --- a/src/utils_trust_region/apply_mo_rotation.org +++ b/src/utils_trust_region/apply_mo_rotation.org @@ -60,12 +60,12 @@ subroutine apply_mo_rotation(R,prev_mos) prev_mos = mo_coef mo_coef = new_mos - if (debug) then - print*,'New mo_coef : ' - do i = 1, mo_num - write(*,'(100(F10.5))') mo_coef(i,:) - enddo - endif + !if (debug) then + ! print*,'New mo_coef : ' + ! do i = 1, mo_num + ! write(*,'(100(F10.5))') mo_coef(i,:) + ! enddo + !endif ! Save the new MOs and change the label mo_label = 'MCSCF' diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h index 2c36a9f0..bbfabfec 100644 --- a/src/utils_trust_region/pi.h +++ b/src/utils_trust_region/pi.h @@ -1,2 +1,2 @@ - logical, parameter :: debug=.False. + !logical, parameter :: debug=.False. double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index f007a860..85e7bf2a 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -214,7 +214,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Resolution with the Newton method: -! Initialization + ! Initialization epsilon = 1d-4 lambda =MAX(0d0, -e_val(1)) @@ -295,9 +295,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*, 'w_1^T . g =', tmp_wtg(1) ! Debug - if (debug) then - print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - endif + !if (debug) then + ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + !endif ! Initialization i = 1 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org index 39173f5d..b39c9a10 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ b/src/utils_trust_region/trust_region_optimal_lambda.org @@ -294,9 +294,9 @@ Resolution with the Newton method: print*, 'w_1^T . g =', tmp_wtg(1) ! Debug - if (debug) then - print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - endif + !if (debug) then + ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + !endif ! Initialization i = 1 From ebd0cf1d9a1324bd69bc19d2e4aaf92465057fad Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 8 Nov 2022 11:59:02 +0100 Subject: [PATCH 51/70] vec_to_mat_index --- src/utils_trust_region/vec_to_mat_index.irp.f | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f diff --git a/src/utils_trust_region/vec_to_mat_index.irp.f b/src/utils_trust_region/vec_to_mat_index.irp.f new file mode 100644 index 00000000..5d68748b --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_index.irp.f @@ -0,0 +1,71 @@ +! Vector to matrix indexes + +! *Compute the indexes p,q of a matrix element with the vector index i* + +! Vector (i) -> lower diagonal matrix (p,q), p > q + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | i | integer | index in the vector | + +! Ouput: +! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | +! | | | p > q, | +! | | | p -> row, | +! | | | q -> column | + + +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine From 0f6572bde1d752b9141baa76e48d5f0da24a4808 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 8 Nov 2022 15:34:10 +0100 Subject: [PATCH 52/70] mat to vec index --- src/bitmask/core_inact_act_virt.irp.f | 6 +- src/utils_trust_region/mat_to_vec_index.irp.f | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f diff --git a/src/bitmask/core_inact_act_virt.irp.f b/src/bitmask/core_inact_act_virt.irp.f index d83d69e9..f3eaff32 100644 --- a/src/bitmask/core_inact_act_virt.irp.f +++ b/src/bitmask/core_inact_act_virt.irp.f @@ -173,10 +173,7 @@ BEGIN_PROVIDER [integer, n_core_inact_act_orb ] END_DOC n_core_inact_act_orb = (n_core_orb + n_inact_orb + n_act_orb) END_PROVIDER - - - BEGIN_PROVIDER [ integer(bit_kind), core_bitmask , (N_int,2) ] implicit none BEGIN_DOC @@ -443,5 +440,4 @@ BEGIN_PROVIDER [integer, list_all_but_del_orb, (n_all_but_del_orb)] endif enddo -END_PROVIDER - +END_PROVIDER diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f new file mode 100644 index 00000000..35e12232 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.irp.f @@ -0,0 +1,61 @@ +! Matrix to vector index + +! *Compute the index i of a vector element from the indexes p,q of a +! matrix element* + +! Lower diagonal matrix (p,q), p > q -> vector (i) + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | p,q | integer | indexes of a matrix element in the lower diagonal | +! | | | p > q, q -> column | +! | | | p -> row, | +! | | | q -> column | + +! Input: +! | i | integer | corresponding index in the vector | + + +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine From a119d5943b1432d5b52c2ec02e12bdbf60a5e227 Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 10 Nov 2022 09:31:33 +0100 Subject: [PATCH 53/70] option to use or not trust region + hessian --- src/mo_localization/EZFIO.cfg | 20 +- src/mo_localization/README.md | 17 + src/mo_localization/localization.irp.f | 299 +++++---- src/mo_localization/localization.org | 697 ++++++++++++++++----- src/mo_localization/localization_sub.irp.f | 339 ++++++++-- 5 files changed, 1068 insertions(+), 304 deletions(-) diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg index f59c3efd..129fb2ea 100644 --- a/src/mo_localization/EZFIO.cfg +++ b/src/mo_localization/EZFIO.cfg @@ -1,42 +1,48 @@ [localization_method] type: character*(32) -doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey +doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey. interface: ezfio,provider,ocaml default: boys [localization_max_nb_iter] type: integer -doc: Maximal number of iterations for the orbital localization +doc: Maximal number of iterations for the orbital localization. interface: ezfio,provider,ocaml default: 1000 +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option requieres a way smaller amount of memory but is not easy to converge. +interface: ezfio,provider,ocaml +default: true + [security_mo_class] type: logical -doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization +doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization. interface: ezfio,provider,ocaml default: true [thresh_loc_max_elem_grad] type: double precision -doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. interface: ezfio,provider,ocaml default: 1.e-6 [kick_in_mos] type: logical -doc: If True, apply a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. interface: ezfio,provider,ocaml default: true [angle_pre_rot] type: double precision -doc: Define the angle for the rotation of the MOs before the localization (in rad) +doc: To define the angle for the rotation of the MOs before the localization (in rad). interface: ezfio,provider,ocaml default: 0.1 [sort_mos_by_e] type: logical -doc: If True, sorts the MOs using the diagonal elements of the Fock matrix +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. interface: ezfio,provider,ocaml default: false diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md index 3d692987..a142ec20 100644 --- a/src/mo_localization/README.md +++ b/src/mo_localization/README.md @@ -85,6 +85,23 @@ symmetry with just a small change in the energy: qp set mo_localization angle_pre_rot 1e-3 ``` +# With or without hessian + trust region +With hessian + trust region +``` +qp set mo_localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set mo_localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + # Further improvements: - Cleaner repo - Correction of the errors in the documentations diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f index 6364851d..6d5cc876 100644 --- a/src/mo_localization/localization.irp.f +++ b/src/mo_localization/localization.irp.f @@ -295,148 +295,223 @@ subroutine run_localization ! Size for the 2D -> 1D transformation tmp_n = tmp_list_size * (tmp_list_size - 1)/2 - - ! Allocation of temporary arrays - allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) - allocate(tmp_R(tmp_list_size, tmp_list_size)) - allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) - ! ### Initialization ### - delta = 0d0 ! can be deleted (normally) - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must be 0.5 + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) - ! Compute the criterion before the loop - call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) - ! Loop until the convergence - do while (not_converged) + ! Init + nb_iter = 0 + delta = 1d0 - print*,'' - print*,'***********************' - print*,'Iteration', nb_iter - print*,'***********************' - print*,'' - - ! Gradient - call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - ! Diagonal hessian - call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) - - ! Diagonalization of the diagonal hessian by hands - !call diagonalization_hessian(tmp_n,H,e_val,w) - do i = 1, tmp_n - e_val(i) = H(i,i) - enddo + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta - ! Key list for dsort - do i = 1, tmp_n - key(i) = i - enddo - - ! Sort of the eigenvalues - call dsort(e_val, key, tmp_n) - - ! Eigenvectors - W = 0d0 - do i = 1, tmp_n - j = key(i) - W(j,i) = 1d0 - enddo - - ! To enter in the loop just after - cancel_step = .True. - nb_sub_iter = 0 - - ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - print*,'-----------------------------' - print*, mo_class(tmp_list(1)) - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & - rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - ! Internal loop exit condition - if (must_exit) then - print*,'trust_region_step_w_expected_e sent: Exit' - exit - endif - - ! 1D tmp -> 2D tmp - call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) - - ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + ! Rotation submatrix call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & info, enforce_step_cancellation) + ! To ensure that the rotation matrix is unitary if (enforce_step_cancellation) then print*, 'Step cancellation, too large error in the rotation matrix' - rho = 0d0 + delta = delta * 0.5d0 cycle + else + delta = min(delta * 2d0, 1d0) endif - ! tmp_R to R, subspace to full space + ! Full rotation matrix and application of the rotation call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) - - ! Rotation of the MOs - call apply_mo_rotation(R, prev_mos) + call apply_mo_rotation(R, prev_mos) - ! Update the things related to mo_coef + ! Update the needed data call update_data_localization() - ! Update the criterion + ! New criterion call criterion_localization(tmp_list_size, tmp_list, criterion) print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & - criterion_model, rho, cancel_step) - - ! Cancellation of the step, previous MOs - if (cancel_step) then - mo_coef = prev_mos + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. endif - - nb_sub_iter = nb_sub_iter + 1 enddo - !call save_mos() !### depend of the time for 1 iteration - ! To exit the external loop if must_exti = .True. - if (must_exit) then - exit - endif + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) - ! External loop exit conditions - if (DABS(max_elem) < thresh_loc_max_elem_grad) then - not_converged = .False. - endif - if (nb_iter > localization_max_nb_iter) then - not_converged = .False. - endif - enddo - - ! Deallocation of temporary arrays - deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + ! Trust region + else - ! Save the MOs - call save_mos() - TOUCH mo_coef + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef - ! Debug - if (debug_hf) then - touch mo_coef - print*,'HF energy:', HF_energy + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + endif - enddo + TOUCH mo_coef ! To sort the MOs using the diagonal elements of the Fock matrix diff --git a/src/mo_localization/localization.org b/src/mo_localization/localization.org index 33b52e7d..293ea06b 100644 --- a/src/mo_localization/localization.org +++ b/src/mo_localization/localization.org @@ -29,9 +29,29 @@ WARNING: to have the right mo class for his next calculation... For more information on the mo_class: -lpqp set_mo_class -h +qp set_mo_class -h *** Foster-Boys localization +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + The Foster-Boys localization is a method to generate localized MOs (LMOs) by minimizing the Foster-Boys criterion: $$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | @@ -481,148 +501,223 @@ subroutine run_localization ! Size for the 2D -> 1D transformation tmp_n = tmp_list_size * (tmp_list_size - 1)/2 - - ! Allocation of temporary arrays - allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) - allocate(tmp_R(tmp_list_size, tmp_list_size)) - allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) - ! ### Initialization ### - delta = 0d0 ! can be deleted (normally) - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must be 0.5 + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) - ! Compute the criterion before the loop - call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) - ! Loop until the convergence - do while (not_converged) + ! Init + nb_iter = 0 + delta = 1d0 - print*,'' - print*,'***********************' - print*,'Iteration', nb_iter - print*,'***********************' - print*,'' - - ! Gradient - call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - ! Diagonal hessian - call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) - - ! Diagonalization of the diagonal hessian by hands - !call diagonalization_hessian(tmp_n,H,e_val,w) - do i = 1, tmp_n - e_val(i) = H(i,i) - enddo + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta - ! Key list for dsort - do i = 1, tmp_n - key(i) = i - enddo - - ! Sort of the eigenvalues - call dsort(e_val, key, tmp_n) - - ! Eigenvectors - W = 0d0 - do i = 1, tmp_n - j = key(i) - W(j,i) = 1d0 - enddo - - ! To enter in the loop just after - cancel_step = .True. - nb_sub_iter = 0 - - ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - print*,'-----------------------------' - print*, mo_class(tmp_list(1)) - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & - rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - ! Internal loop exit condition - if (must_exit) then - print*,'trust_region_step_w_expected_e sent: Exit' - exit - endif - - ! 1D tmp -> 2D tmp - call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) - - ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + ! Rotation submatrix call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & info, enforce_step_cancellation) + ! To ensure that the rotation matrix is unitary if (enforce_step_cancellation) then print*, 'Step cancellation, too large error in the rotation matrix' - rho = 0d0 + delta = delta * 0.5d0 cycle + else + delta = min(delta * 2d0, 1d0) endif - ! tmp_R to R, subspace to full space + ! Full rotation matrix and application of the rotation call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) - - ! Rotation of the MOs - call apply_mo_rotation(R, prev_mos) + call apply_mo_rotation(R, prev_mos) - ! Update the things related to mo_coef + ! Update the needed data call update_data_localization() - ! Update the criterion + ! New criterion call criterion_localization(tmp_list_size, tmp_list, criterion) print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & - criterion_model, rho, cancel_step) - - ! Cancellation of the step, previous MOs - if (cancel_step) then - mo_coef = prev_mos + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. endif - - nb_sub_iter = nb_sub_iter + 1 enddo - !call save_mos() !### depend of the time for 1 iteration - ! To exit the external loop if must_exti = .True. - if (must_exit) then - exit - endif + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) - ! External loop exit conditions - if (DABS(max_elem) < thresh_loc_max_elem_grad) then - not_converged = .False. - endif - if (nb_iter > localization_max_nb_iter) then - not_converged = .False. - endif - enddo - - ! Deallocation of temporary arrays - deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + ! Trust region + else - ! Save the MOs - call save_mos() - TOUCH mo_coef + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef - ! Debug - if (debug_hf) then - touch mo_coef - print*,'HF energy:', HF_energy + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + endif - enddo + TOUCH mo_coef ! To sort the MOs using the diagonal elements of the Fock matrix @@ -682,9 +777,8 @@ subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_ele elseif (localization_method== 'pipek') then call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) else - v_grad = 0d0 - max_elem = 0d0 - norm_grad = 0d0 + print*,'Unkown method:'//localization_method + call abort endif end @@ -717,7 +811,8 @@ subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) elseif (localization_method == 'pipek') then call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) else - H = 0d0 + print*,'Unkown method: '//localization_method + call abort endif end @@ -748,7 +843,8 @@ subroutine criterion_localization(tmp_list_size, tmp_list,criterion) !call criterion_PM(tmp_list_size, tmp_list,criterion) call criterion_PM_v3(tmp_list_size, tmp_list, criterion) else - criterion = 0d0 + print*,'Unkown method: '//localization_method + call abort endif end @@ -770,10 +866,45 @@ subroutine update_data_localization() elseif (localization_method == 'pipek') then ! Nothing required else + print*,'Unkown method: '//localization_method + call abort endif end #+END_SRC +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + ** Foster-Boys *** Gradient Input: @@ -1174,7 +1305,7 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra BEGIN_DOC ! Compute gradient for the Pipek-Mezey localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:), tmp_int(:,:) @@ -1187,54 +1318,54 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra m_grad = 0d0 do a = 1, nucl_num ! loop over the nuclei - tmp_int = 0d0 ! Initialization for each nuclei + tmp_int = 0d0 ! Initialization for each nuclei - ! Loop over the MOs of the a given mo_class to compute - do tmp_j = 1, tmp_list_size - j = tmp_list(tmp_j) - do tmp_i = 1, tmp_list_size - i = tmp_list(tmp_i) - do rho = 1, ao_num ! loop over all the AOs - do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a - mu = nucl_aos(a,b) ! AO centered on atom a + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a - tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & - + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) - enddo - enddo - enddo - enddo + enddo + enddo + enddo + enddo - ! Gradient - do tmp_j = 1, tmp_list_size - do tmp_i = 1, tmp_list_size + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size - m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) - enddo - enddo + enddo + enddo enddo ! 2D -> 1D do tmp_k = 1, tmp_n - call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient max_elem = 0d0 do tmp_k = 1, tmp_n - if (ABS(v_grad(tmp_k)) > max_elem) then - max_elem = ABS(v_grad(tmp_k)) - endif - enddo - + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + ! Norm of the gradient norm_grad = 0d0 do tmp_k = 1, tmp_n - norm_grad = norm_grad + v_grad(tmp_k)**2 + norm_grad = norm_grad + v_grad(tmp_k)**2 enddo norm_grad = dsqrt(norm_grad) @@ -1244,7 +1375,7 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra ! Deallocation deallocate(m_grad,tmp_int) -end +end subroutine grad_pipek #+END_SRC *** Gradient @@ -1971,6 +2102,274 @@ subroutine criterion_FB(tmp_list_size, tmp_list, criterion) end subroutine #+END_SRC +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + ** Spatial extent The spatial extent of an orbital $i$ is computed as diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f index 90a4bfb5..5ca89790 100644 --- a/src/mo_localization/localization_sub.irp.f +++ b/src/mo_localization/localization_sub.irp.f @@ -38,9 +38,8 @@ subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_ele elseif (localization_method== 'pipek') then call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) else - v_grad = 0d0 - max_elem = 0d0 - norm_grad = 0d0 + print*,'Unkown method:'//localization_method + call abort endif end @@ -74,7 +73,8 @@ subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) elseif (localization_method == 'pipek') then call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) else - H = 0d0 + print*,'Unkown method: '//localization_method + call abort endif end @@ -106,7 +106,8 @@ subroutine criterion_localization(tmp_list_size, tmp_list,criterion) !call criterion_PM(tmp_list_size, tmp_list,criterion) call criterion_PM_v3(tmp_list_size, tmp_list, criterion) else - criterion = 0d0 + print*,'Unkown method: '//localization_method + call abort endif end @@ -129,9 +130,45 @@ subroutine update_data_localization() elseif (localization_method == 'pipek') then ! Nothing required else + print*,'Unkown method: '//localization_method + call abort endif end + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + ! Gradient ! Input: ! | tmp_n | integer | Number of parameters in the MO subspace | @@ -526,7 +563,7 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra BEGIN_DOC ! Compute gradient for the Pipek-Mezey localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:), tmp_int(:,:) @@ -539,54 +576,54 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra m_grad = 0d0 do a = 1, nucl_num ! loop over the nuclei - tmp_int = 0d0 ! Initialization for each nuclei + tmp_int = 0d0 ! Initialization for each nuclei - ! Loop over the MOs of the a given mo_class to compute - do tmp_j = 1, tmp_list_size - j = tmp_list(tmp_j) - do tmp_i = 1, tmp_list_size - i = tmp_list(tmp_i) - do rho = 1, ao_num ! loop over all the AOs - do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a - mu = nucl_aos(a,b) ! AO centered on atom a + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a - tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & - + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) - enddo - enddo - enddo - enddo + enddo + enddo + enddo + enddo - ! Gradient - do tmp_j = 1, tmp_list_size - do tmp_i = 1, tmp_list_size + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size - m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) - enddo - enddo + enddo + enddo enddo ! 2D -> 1D do tmp_k = 1, tmp_n - call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient max_elem = 0d0 do tmp_k = 1, tmp_n - if (ABS(v_grad(tmp_k)) > max_elem) then - max_elem = ABS(v_grad(tmp_k)) - endif - enddo - + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + ! Norm of the gradient norm_grad = 0d0 do tmp_k = 1, tmp_n - norm_grad = norm_grad + v_grad(tmp_k)**2 + norm_grad = norm_grad + v_grad(tmp_k)**2 enddo norm_grad = dsqrt(norm_grad) @@ -596,7 +633,7 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra ! Deallocation deallocate(m_grad,tmp_int) -end +end subroutine grad_pipek ! Gradient @@ -1312,6 +1349,236 @@ subroutine criterion_FB(tmp_list_size, tmp_list, criterion) end subroutine +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + ! Spatial extent ! The spatial extent of an orbital $i$ is computed as From 21bbc6911c566c06528a95cd5999a663bcb96325 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 11 Nov 2022 09:09:51 +0100 Subject: [PATCH 54/70] configure --help in message --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index b3ed7758..7d063b1d 100755 --- a/configure +++ b/configure @@ -369,7 +369,7 @@ else echo "" echo "${QP_ROOT}/build.ninja does not exist," echo "you need to specify the COMPILATION configuration file." - echo "See ./configure --help for more details." + echo "See ./configure -h for more details." echo "" fi From 63a74c8afa4a24502dcf74cfadb405c2e87fd008 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 14:50:54 +0100 Subject: [PATCH 55/70] fix old fortran --- src/ao_one_e_ints/pseudopot.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 141292d8..67dda7b5 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -1097,7 +1097,7 @@ implicit none integer nptsgridmax,nptsgrid double precision coefs_pseudo,ptsgrid parameter(nptsgridmax=50) -common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +double precision common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) !!!!! integer npower_orb(3),l,m,i double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3) @@ -1238,7 +1238,7 @@ end double precision coefs_pseudo,ptsgrid double precision p,q,r,s parameter(nptsgridmax=50) - common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + double precision common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) p=1.d0/dsqrt(2.d0) q=1.d0/dsqrt(3.d0) From c1a5029de78f6ed4b0a73de3cc7f52888ed0ad9e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 14:55:37 +0100 Subject: [PATCH 56/70] Removed files which sould not be in git repo --- external/Python/docopt.py | 579 ----------------------------------- include/f77_zmq.h | 617 -------------------------------------- 2 files changed, 1196 deletions(-) delete mode 100644 external/Python/docopt.py delete mode 100644 include/f77_zmq.h diff --git a/external/Python/docopt.py b/external/Python/docopt.py deleted file mode 100644 index 7b927e2f..00000000 --- a/external/Python/docopt.py +++ /dev/null @@ -1,579 +0,0 @@ -"""Pythonic command-line interface parser that will make you smile. - - * http://docopt.org - * Repository and issue-tracker: https://github.com/docopt/docopt - * Licensed under terms of MIT license (see LICENSE-MIT) - * Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com - -""" -import sys -import re - - -__all__ = ['docopt'] -__version__ = '0.6.2' - - -class DocoptLanguageError(Exception): - - """Error in construction of usage-message by developer.""" - - -class DocoptExit(SystemExit): - - """Exit in case user invoked program with incorrect arguments.""" - - usage = '' - - def __init__(self, message=''): - SystemExit.__init__(self, (message + '\n' + self.usage).strip()) - - -class Pattern(object): - - def __eq__(self, other): - return repr(self) == repr(other) - - def __hash__(self): - return hash(repr(self)) - - def fix(self): - self.fix_identities() - self.fix_repeating_arguments() - return self - - def fix_identities(self, uniq=None): - """Make pattern-tree tips point to same object if they are equal.""" - if not hasattr(self, 'children'): - return self - uniq = list(set(self.flat())) if uniq is None else uniq - for i, c in enumerate(self.children): - if not hasattr(c, 'children'): - assert c in uniq - self.children[i] = uniq[uniq.index(c)] - else: - c.fix_identities(uniq) - - def fix_repeating_arguments(self): - """Fix elements that should accumulate/increment values.""" - either = [list(c.children) for c in self.either.children] - for case in either: - for e in [c for c in case if case.count(c) > 1]: - if type(e) is Argument or type(e) is Option and e.argcount: - if e.value is None: - e.value = [] - elif type(e.value) is not list: - e.value = e.value.split() - if type(e) is Command or type(e) is Option and e.argcount == 0: - e.value = 0 - return self - - @property - def either(self): - """Transform pattern into an equivalent, with only top-level Either.""" - # Currently the pattern will not be equivalent, but more "narrow", - # although good enough to reason about list arguments. - ret = [] - groups = [[self]] - while groups: - children = groups.pop(0) - types = [type(c) for c in children] - if Either in types: - either = [c for c in children if type(c) is Either][0] - children.pop(children.index(either)) - for c in either.children: - groups.append([c] + children) - elif Required in types: - required = [c for c in children if type(c) is Required][0] - children.pop(children.index(required)) - groups.append(list(required.children) + children) - elif Optional in types: - optional = [c for c in children if type(c) is Optional][0] - children.pop(children.index(optional)) - groups.append(list(optional.children) + children) - elif AnyOptions in types: - optional = [c for c in children if type(c) is AnyOptions][0] - children.pop(children.index(optional)) - groups.append(list(optional.children) + children) - elif OneOrMore in types: - oneormore = [c for c in children if type(c) is OneOrMore][0] - children.pop(children.index(oneormore)) - groups.append(list(oneormore.children) * 2 + children) - else: - ret.append(children) - return Either(*[Required(*e) for e in ret]) - - -class ChildPattern(Pattern): - - def __init__(self, name, value=None): - self.name = name - self.value = value - - def __repr__(self): - return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value) - - def flat(self, *types): - return [self] if not types or type(self) in types else [] - - def match(self, left, collected=None): - collected = [] if collected is None else collected - pos, match = self.single_match(left) - if match is None: - return False, left, collected - left_ = left[:pos] + left[pos + 1:] - same_name = [a for a in collected if a.name == self.name] - if type(self.value) in (int, list): - if type(self.value) is int: - increment = 1 - else: - increment = ([match.value] if type(match.value) is str - else match.value) - if not same_name: - match.value = increment - return True, left_, collected + [match] - same_name[0].value += increment - return True, left_, collected - return True, left_, collected + [match] - - -class ParentPattern(Pattern): - - def __init__(self, *children): - self.children = list(children) - - def __repr__(self): - return '%s(%s)' % (self.__class__.__name__, - ', '.join(repr(a) for a in self.children)) - - def flat(self, *types): - if type(self) in types: - return [self] - return sum([c.flat(*types) for c in self.children], []) - - -class Argument(ChildPattern): - - def single_match(self, left): - for n, p in enumerate(left): - if type(p) is Argument: - return n, Argument(self.name, p.value) - return None, None - - @classmethod - def parse(class_, source): - name = re.findall('(<\S*?>)', source)[0] - value = re.findall('\[default: (.*)\]', source, flags=re.I) - return class_(name, value[0] if value else None) - - -class Command(Argument): - - def __init__(self, name, value=False): - self.name = name - self.value = value - - def single_match(self, left): - for n, p in enumerate(left): - if type(p) is Argument: - if p.value == self.name: - return n, Command(self.name, True) - else: - break - return None, None - - -class Option(ChildPattern): - - def __init__(self, short=None, long=None, argcount=0, value=False): - assert argcount in (0, 1) - self.short, self.long = short, long - self.argcount, self.value = argcount, value - self.value = None if value is False and argcount else value - - @classmethod - def parse(class_, option_description): - short, long, argcount, value = None, None, 0, False - options, _, description = option_description.strip().partition(' ') - options = options.replace(',', ' ').replace('=', ' ') - for s in options.split(): - if s.startswith('--'): - long = s - elif s.startswith('-'): - short = s - else: - argcount = 1 - if argcount: - matched = re.findall('\[default: (.*)\]', description, flags=re.I) - value = matched[0] if matched else None - return class_(short, long, argcount, value) - - def single_match(self, left): - for n, p in enumerate(left): - if self.name == p.name: - return n, p - return None, None - - @property - def name(self): - return self.long or self.short - - def __repr__(self): - return 'Option(%r, %r, %r, %r)' % (self.short, self.long, - self.argcount, self.value) - - -class Required(ParentPattern): - - def match(self, left, collected=None): - collected = [] if collected is None else collected - l = left - c = collected - for p in self.children: - matched, l, c = p.match(l, c) - if not matched: - return False, left, collected - return True, l, c - - -class Optional(ParentPattern): - - def match(self, left, collected=None): - collected = [] if collected is None else collected - for p in self.children: - m, left, collected = p.match(left, collected) - return True, left, collected - - -class AnyOptions(Optional): - - """Marker/placeholder for [options] shortcut.""" - - -class OneOrMore(ParentPattern): - - def match(self, left, collected=None): - assert len(self.children) == 1 - collected = [] if collected is None else collected - l = left - c = collected - l_ = None - matched = True - times = 0 - while matched: - # could it be that something didn't match but changed l or c? - matched, l, c = self.children[0].match(l, c) - times += 1 if matched else 0 - if l_ == l: - break - l_ = l - if times >= 1: - return True, l, c - return False, left, collected - - -class Either(ParentPattern): - - def match(self, left, collected=None): - collected = [] if collected is None else collected - outcomes = [] - for p in self.children: - matched, _, _ = outcome = p.match(left, collected) - if matched: - outcomes.append(outcome) - if outcomes: - return min(outcomes, key=lambda outcome: len(outcome[1])) - return False, left, collected - - -class TokenStream(list): - - def __init__(self, source, error): - self += source.split() if hasattr(source, 'split') else source - self.error = error - - def move(self): - return self.pop(0) if len(self) else None - - def current(self): - return self[0] if len(self) else None - - -def parse_long(tokens, options): - """long ::= '--' chars [ ( ' ' | '=' ) chars ] ;""" - long, eq, value = tokens.move().partition('=') - assert long.startswith('--') - value = None if eq == value == '' else value - similar = [o for o in options if o.long == long] - if tokens.error is DocoptExit and similar == []: # if no exact match - similar = [o for o in options if o.long and o.long.startswith(long)] - if len(similar) > 1: # might be simply specified ambiguously 2+ times? - raise tokens.error('%s is not a unique prefix: %s?' % - (long, ', '.join(o.long for o in similar))) - elif len(similar) < 1: - argcount = 1 if eq == '=' else 0 - o = Option(None, long, argcount) - options.append(o) - if tokens.error is DocoptExit: - o = Option(None, long, argcount, value if argcount else True) - else: - o = Option(similar[0].short, similar[0].long, - similar[0].argcount, similar[0].value) - if o.argcount == 0: - if value is not None: - raise tokens.error('%s must not have an argument' % o.long) - else: - if value is None: - if tokens.current() is None: - raise tokens.error('%s requires argument' % o.long) - value = tokens.move() - if tokens.error is DocoptExit: - o.value = value if value is not None else True - return [o] - - -def parse_shorts(tokens, options): - """shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;""" - token = tokens.move() - assert token.startswith('-') and not token.startswith('--') - left = token.lstrip('-') - parsed = [] - while left != '': - short, left = '-' + left[0], left[1:] - similar = [o for o in options if o.short == short] - if len(similar) > 1: - raise tokens.error('%s is specified ambiguously %d times' % - (short, len(similar))) - elif len(similar) < 1: - o = Option(short, None, 0) - options.append(o) - if tokens.error is DocoptExit: - o = Option(short, None, 0, True) - else: # why copying is necessary here? - o = Option(short, similar[0].long, - similar[0].argcount, similar[0].value) - value = None - if o.argcount != 0: - if left == '': - if tokens.current() is None: - raise tokens.error('%s requires argument' % short) - value = tokens.move() - else: - value = left - left = '' - if tokens.error is DocoptExit: - o.value = value if value is not None else True - parsed.append(o) - return parsed - - -def parse_pattern(source, options): - tokens = TokenStream(re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source), - DocoptLanguageError) - result = parse_expr(tokens, options) - if tokens.current() is not None: - raise tokens.error('unexpected ending: %r' % ' '.join(tokens)) - return Required(*result) - - -def parse_expr(tokens, options): - """expr ::= seq ( '|' seq )* ;""" - seq = parse_seq(tokens, options) - if tokens.current() != '|': - return seq - result = [Required(*seq)] if len(seq) > 1 else seq - while tokens.current() == '|': - tokens.move() - seq = parse_seq(tokens, options) - result += [Required(*seq)] if len(seq) > 1 else seq - return [Either(*result)] if len(result) > 1 else result - - -def parse_seq(tokens, options): - """seq ::= ( atom [ '...' ] )* ;""" - result = [] - while tokens.current() not in [None, ']', ')', '|']: - atom = parse_atom(tokens, options) - if tokens.current() == '...': - atom = [OneOrMore(*atom)] - tokens.move() - result += atom - return result - - -def parse_atom(tokens, options): - """atom ::= '(' expr ')' | '[' expr ']' | 'options' - | long | shorts | argument | command ; - """ - token = tokens.current() - result = [] - if token in '([': - tokens.move() - matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token] - result = pattern(*parse_expr(tokens, options)) - if tokens.move() != matching: - raise tokens.error("unmatched '%s'" % token) - return [result] - elif token == 'options': - tokens.move() - return [AnyOptions()] - elif token.startswith('--') and token != '--': - return parse_long(tokens, options) - elif token.startswith('-') and token not in ('-', '--'): - return parse_shorts(tokens, options) - elif token.startswith('<') and token.endswith('>') or token.isupper(): - return [Argument(tokens.move())] - else: - return [Command(tokens.move())] - - -def parse_argv(tokens, options, options_first=False): - """Parse command-line argument vector. - - If options_first: - argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ; - else: - argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ; - - """ - parsed = [] - while tokens.current() is not None: - if tokens.current() == '--': - return parsed + [Argument(None, v) for v in tokens] - elif tokens.current().startswith('--'): - parsed += parse_long(tokens, options) - elif tokens.current().startswith('-') and tokens.current() != '-': - parsed += parse_shorts(tokens, options) - elif options_first: - return parsed + [Argument(None, v) for v in tokens] - else: - parsed.append(Argument(None, tokens.move())) - return parsed - - -def parse_defaults(doc): - # in python < 2.7 you can't pass flags=re.MULTILINE - split = re.split('\n *(<\S+?>|-\S+?)', doc)[1:] - split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])] - options = [Option.parse(s) for s in split if s.startswith('-')] - #arguments = [Argument.parse(s) for s in split if s.startswith('<')] - #return options, arguments - return options - - -def printable_usage(doc): - # in python < 2.7 you can't pass flags=re.IGNORECASE - usage_split = re.split(r'([Uu][Ss][Aa][Gg][Ee]:)', doc) - if len(usage_split) < 3: - raise DocoptLanguageError('"usage:" (case-insensitive) not found.') - if len(usage_split) > 3: - raise DocoptLanguageError('More than one "usage:" (case-insensitive).') - return re.split(r'\n\s*\n', ''.join(usage_split[1:]))[0].strip() - - -def formal_usage(printable_usage): - pu = printable_usage.split()[1:] # split and drop "usage:" - return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )' - - -def extras(help, version, options, doc): - if help and any((o.name in ('-h', '--help')) and o.value for o in options): - print(doc.strip("\n")) - sys.exit() - if version and any(o.name == '--version' and o.value for o in options): - print(version) - sys.exit() - - -class Dict(dict): - def __repr__(self): - return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items())) - - -def docopt(doc, argv=None, help=True, version=None, options_first=False): - """Parse `argv` based on command-line interface described in `doc`. - - `docopt` creates your command-line interface based on its - description that you pass as `doc`. Such description can contain - --options, , commands, which could be - [optional], (required), (mutually | exclusive) or repeated... - - Parameters - ---------- - doc : str - Description of your command-line interface. - argv : list of str, optional - Argument vector to be parsed. sys.argv[1:] is used if not - provided. - help : bool (default: True) - Set to False to disable automatic help on -h or --help - options. - version : any object - If passed, the object will be printed if --version is in - `argv`. - options_first : bool (default: False) - Set to True to require options preceed positional arguments, - i.e. to forbid options and positional arguments intermix. - - Returns - ------- - args : dict - A dictionary, where keys are names of command-line elements - such as e.g. "--verbose" and "", and values are the - parsed values of those elements. - - Example - ------- - >>> from docopt import docopt - >>> doc = ''' - Usage: - my_program tcp [--timeout=] - my_program serial [--baud=] [--timeout=] - my_program (-h | --help | --version) - - Options: - -h, --help Show this screen and exit. - --baud= Baudrate [default: 9600] - ''' - >>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30'] - >>> docopt(doc, argv) - {'--baud': '9600', - '--help': False, - '--timeout': '30', - '--version': False, - '': '127.0.0.1', - '': '80', - 'serial': False, - 'tcp': True} - - See also - -------- - * For video introduction see http://docopt.org - * Full documentation is available in README.rst as well as online - at https://github.com/docopt/docopt#readme - - """ - if argv is None: - argv = sys.argv[1:] - DocoptExit.usage = printable_usage(doc) - options = parse_defaults(doc) - pattern = parse_pattern(formal_usage(DocoptExit.usage), options) - # [default] syntax for argument is disabled - #for a in pattern.flat(Argument): - # same_name = [d for d in arguments if d.name == a.name] - # if same_name: - # a.value = same_name[0].value - argv = parse_argv(TokenStream(argv, DocoptExit), list(options), - options_first) - pattern_options = set(pattern.flat(Option)) - for ao in pattern.flat(AnyOptions): - doc_options = parse_defaults(doc) - ao.children = list(set(doc_options) - pattern_options) - #if any_options: - # ao.children += [Option(o.short, o.long, o.argcount) - # for o in argv if type(o) is Option] - extras(help, version, argv, doc) - matched, left, collected = pattern.fix().match(argv) - if matched and left == []: # better error message if left? - return Dict((a.name, a.value) for a in (pattern.flat() + collected)) - raise DocoptExit() diff --git a/include/f77_zmq.h b/include/f77_zmq.h deleted file mode 100644 index b19bb707..00000000 --- a/include/f77_zmq.h +++ /dev/null @@ -1,617 +0,0 @@ - integer EADDRINUSE - integer EADDRNOTAVAIL - integer EAFNOSUPPORT - integer ECONNABORTED - integer ECONNREFUSED - integer ECONNRESET - integer EFSM - integer EHOSTUNREACH - integer EINPROGRESS - integer EMSGSIZE - integer EMTHREAD - integer ENETDOWN - integer ENETRESET - integer ENETUNREACH - integer ENOBUFS - integer ENOCOMPATPROTO - integer ENOTCONN - integer ENOTSOCK - integer ENOTSUP - integer EPROTONOSUPPORT - integer ETERM - integer ETIMEDOUT - integer ZMQ_AFFINITY - integer ZMQ_BACKLOG - integer ZMQ_BINDTODEVICE - integer ZMQ_BLOCKY - integer ZMQ_CHANNEL - integer ZMQ_CLIENT - integer ZMQ_CONFLATE - integer ZMQ_CONNECT_RID - integer ZMQ_CONNECT_ROUTING_ID - integer ZMQ_CONNECT_TIMEOUT - integer ZMQ_CURRENT_EVENT_VERSION - integer ZMQ_CURRENT_EVENT_VERSION_DRAFT - integer ZMQ_CURVE - integer ZMQ_CURVE_PUBLICKEY - integer ZMQ_CURVE_SECRETKEY - integer ZMQ_CURVE_SERVER - integer ZMQ_CURVE_SERVERKEY - integer ZMQ_DEALER - integer ZMQ_DEFINED_STDINT - integer ZMQ_DELAY_ATTACH_ON_CONNECT - integer ZMQ_DGRAM - integer ZMQ_DISCONNECT_MSG - integer ZMQ_DISH - integer ZMQ_DONTWAIT - integer ZMQ_EVENTS - integer ZMQ_EVENT_ACCEPTED - integer ZMQ_EVENT_ACCEPT_FAILED - integer ZMQ_EVENT_ALL - integer ZMQ_EVENT_ALL_V1 - integer ZMQ_EVENT_ALL_V2 - integer ZMQ_EVENT_BIND_FAILED - integer ZMQ_EVENT_CLOSED - integer ZMQ_EVENT_CLOSE_FAILED - integer ZMQ_EVENT_CONNECTED - integer ZMQ_EVENT_CONNECT_DELAYED - integer ZMQ_EVENT_CONNECT_RETRIED - integer ZMQ_EVENT_DISCONNECTED - integer ZMQ_EVENT_HANDSHAKE_FAILED_AUTH - integer ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL - integer ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL - integer ZMQ_EVENT_HANDSHAKE_SUCCEEDED - integer ZMQ_EVENT_LISTENING - integer ZMQ_EVENT_MONITOR_STOPPED - integer ZMQ_EVENT_PIPES_STATS - integer ZMQ_FAIL_UNROUTABLE - integer ZMQ_FD - integer ZMQ_FORWARDER - integer ZMQ_GATHER - integer ZMQ_GROUP_MAX_LENGTH - integer ZMQ_GSSAPI - integer ZMQ_GSSAPI_NT_HOSTBASED - integer ZMQ_GSSAPI_NT_KRB5_PRINCIPAL - integer ZMQ_GSSAPI_NT_USER_NAME - integer ZMQ_GSSAPI_PLAINTEXT - integer ZMQ_GSSAPI_PRINCIPAL - integer ZMQ_GSSAPI_PRINCIPAL_NAMETYPE - integer ZMQ_GSSAPI_SERVER - integer ZMQ_GSSAPI_SERVICE_PRINCIPAL - integer ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE - integer ZMQ_HANDSHAKE_IVL - integer ZMQ_HAS_CAPABILITIES - integer ZMQ_HAUSNUMERO - integer ZMQ_HEARTBEAT_IVL - integer ZMQ_HEARTBEAT_TIMEOUT - integer ZMQ_HEARTBEAT_TTL - integer ZMQ_HELLO_MSG - integer ZMQ_IDENTITY - integer ZMQ_IMMEDIATE - integer ZMQ_INVERT_MATCHING - integer ZMQ_IN_BATCH_SIZE - integer ZMQ_IO_THREADS - integer ZMQ_IO_THREADS_DFLT - integer ZMQ_IPC_FILTER_GID - integer ZMQ_IPC_FILTER_PID - integer ZMQ_IPC_FILTER_UID - integer ZMQ_IPV4ONLY - integer ZMQ_IPV6 - integer ZMQ_LAST_ENDPOINT - integer ZMQ_LINGER - integer ZMQ_LOOPBACK_FASTPATH - integer ZMQ_MAXMSGSIZE - integer ZMQ_MAX_MSGSZ - integer ZMQ_MAX_SOCKETS - integer ZMQ_MAX_SOCKETS_DFLT - integer ZMQ_MECHANISM - integer ZMQ_METADATA - integer ZMQ_MORE - integer ZMQ_MSG_T_SIZE - integer ZMQ_MULTICAST_HOPS - integer ZMQ_MULTICAST_LOOP - integer ZMQ_MULTICAST_MAXTPDU - integer ZMQ_NOBLOCK - integer ZMQ_NOTIFY_CONNECT - integer ZMQ_NOTIFY_DISCONNECT - integer ZMQ_NULL - integer ZMQ_ONLY_FIRST_SUBSCRIBE - integer ZMQ_OUT_BATCH_SIZE - integer ZMQ_PAIR - integer ZMQ_PEER - integer ZMQ_PLAIN - integer ZMQ_PLAIN_PASSWORD - integer ZMQ_PLAIN_SERVER - integer ZMQ_PLAIN_USERNAME - integer ZMQ_POLLERR - integer ZMQ_POLLIN - integer ZMQ_POLLITEMS_DFLT - integer ZMQ_POLLOUT - integer ZMQ_POLLPRI - integer ZMQ_PRIORITY - integer ZMQ_PROBE_ROUTER - integer ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED - integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID - integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION - integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA - integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE - integer ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY - integer ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED - integer ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC - integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA - integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE - integer ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME - integer ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH - integer ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND - integer ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED - integer ZMQ_PTR - integer ZMQ_PUB - integer ZMQ_PULL - integer ZMQ_PUSH - integer ZMQ_QUEUE - integer ZMQ_RADIO - integer ZMQ_RATE - integer ZMQ_RCVBUF - integer ZMQ_RCVHWM - integer ZMQ_RCVMORE - integer ZMQ_RCVTIMEO - integer ZMQ_RECONNECT_IVL - integer ZMQ_RECONNECT_IVL_MAX - integer ZMQ_RECONNECT_STOP - integer ZMQ_RECONNECT_STOP_AFTER_DISCONNECT - integer ZMQ_RECONNECT_STOP_CONN_REFUSED - integer ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED - integer ZMQ_RECOVERY_IVL - integer ZMQ_REP - integer ZMQ_REQ - integer ZMQ_REQ_CORRELATE - integer ZMQ_REQ_RELAXED - integer ZMQ_ROUTER - integer ZMQ_ROUTER_BEHAVIOR - integer ZMQ_ROUTER_HANDOVER - integer ZMQ_ROUTER_MANDATORY - integer ZMQ_ROUTER_NOTIFY - integer ZMQ_ROUTER_RAW - integer ZMQ_ROUTING_ID - integer ZMQ_SCATTER - integer ZMQ_SERVER - integer ZMQ_SHARED - integer ZMQ_SNDBUF - integer ZMQ_SNDHWM - integer ZMQ_SNDMORE - integer ZMQ_SNDTIMEO - integer ZMQ_SOCKET_LIMIT - integer ZMQ_SOCKS_PASSWORD - integer ZMQ_SOCKS_PROXY - integer ZMQ_SOCKS_USERNAME - integer ZMQ_SRCFD - integer ZMQ_STREAM - integer ZMQ_STREAMER - integer ZMQ_STREAM_NOTIFY - integer ZMQ_SUB - integer ZMQ_SUBSCRIBE - integer ZMQ_TCP_ACCEPT_FILTER - integer ZMQ_TCP_KEEPALIVE - integer ZMQ_TCP_KEEPALIVE_CNT - integer ZMQ_TCP_KEEPALIVE_IDLE - integer ZMQ_TCP_KEEPALIVE_INTVL - integer ZMQ_TCP_MAXRT - integer ZMQ_THREAD_AFFINITY_CPU_ADD - integer ZMQ_THREAD_AFFINITY_CPU_REMOVE - integer ZMQ_THREAD_NAME_PREFIX - integer ZMQ_THREAD_PRIORITY - integer ZMQ_THREAD_PRIORITY_DFLT - integer ZMQ_THREAD_SAFE - integer ZMQ_THREAD_SCHED_POLICY - integer ZMQ_THREAD_SCHED_POLICY_DFLT - integer ZMQ_TOS - integer ZMQ_TYPE - integer ZMQ_UNSUBSCRIBE - integer ZMQ_USE_FD - integer ZMQ_VERSION - integer ZMQ_VERSION_MAJOR - integer ZMQ_VERSION_MINOR - integer ZMQ_VERSION_PATCH - integer ZMQ_VMCI_BUFFER_MAX_SIZE - integer ZMQ_VMCI_BUFFER_MIN_SIZE - integer ZMQ_VMCI_BUFFER_SIZE - integer ZMQ_VMCI_CONNECT_TIMEOUT - integer ZMQ_WSS_CERT_PEM - integer ZMQ_WSS_HOSTNAME - integer ZMQ_WSS_KEY_PEM - integer ZMQ_WSS_TRUST_PEM - integer ZMQ_WSS_TRUST_SYSTEM - integer ZMQ_XPUB - integer ZMQ_XPUB_MANUAL - integer ZMQ_XPUB_MANUAL_LAST_VALUE - integer ZMQ_XPUB_NODROP - integer ZMQ_XPUB_VERBOSE - integer ZMQ_XPUB_VERBOSER - integer ZMQ_XPUB_WELCOME_MSG - integer ZMQ_XREP - integer ZMQ_XREQ - integer ZMQ_XSUB - integer ZMQ_ZAP_DOMAIN - integer ZMQ_ZAP_ENFORCE_DOMAIN - integer ZMQ_ZERO_COPY_RECV - parameter(EADDRINUSE=156384717) - parameter(EADDRNOTAVAIL=156384718) - parameter(EAFNOSUPPORT=156384723) - parameter(ECONNABORTED=156384725) - parameter(ECONNREFUSED=156384719) - parameter(ECONNRESET=156384726) - parameter(EFSM=156384763) - parameter(EHOSTUNREACH=156384729) - parameter(EINPROGRESS=156384720) - parameter(EMSGSIZE=156384722) - parameter(EMTHREAD=156384766) - parameter(ENETDOWN=156384716) - parameter(ENETRESET=156384730) - parameter(ENETUNREACH=156384724) - parameter(ENOBUFS=156384715) - parameter(ENOCOMPATPROTO=156384764) - parameter(ENOTCONN=156384727) - parameter(ENOTSOCK=156384721) - parameter(ENOTSUP=156384713) - parameter(EPROTONOSUPPORT=156384714) - parameter(ETERM=156384765) - parameter(ETIMEDOUT=156384728) - parameter(ZMQ_AFFINITY=4) - parameter(ZMQ_BACKLOG=19) - parameter(ZMQ_BINDTODEVICE=92) - parameter(ZMQ_BLOCKY=70) - parameter(ZMQ_CHANNEL=20) - parameter(ZMQ_CLIENT=13) - parameter(ZMQ_CONFLATE=54) - parameter(ZMQ_CONNECT_RID=61) - parameter(ZMQ_CONNECT_ROUTING_ID=61) - parameter(ZMQ_CONNECT_TIMEOUT=79) - parameter(ZMQ_CURRENT_EVENT_VERSION=1) - parameter(ZMQ_CURRENT_EVENT_VERSION_DRAFT=2) - parameter(ZMQ_CURVE=2) - parameter(ZMQ_CURVE_PUBLICKEY=48) - parameter(ZMQ_CURVE_SECRETKEY=49) - parameter(ZMQ_CURVE_SERVER=47) - parameter(ZMQ_CURVE_SERVERKEY=50) - parameter(ZMQ_DEALER=5) - parameter(ZMQ_DEFINED_STDINT=1) - parameter(ZMQ_DELAY_ATTACH_ON_CONNECT=39) - parameter(ZMQ_DGRAM=18) - parameter(ZMQ_DISCONNECT_MSG=111) - parameter(ZMQ_DISH=15) - parameter(ZMQ_DONTWAIT=1) - parameter(ZMQ_EVENTS=15) - parameter(ZMQ_EVENT_ACCEPTED=32) - parameter(ZMQ_EVENT_ACCEPT_FAILED=64) - parameter(ZMQ_EVENT_ALL=65535) - parameter(ZMQ_EVENT_ALL_V1=65535) - parameter(ZMQ_EVENT_ALL_V2=131071) - parameter(ZMQ_EVENT_BIND_FAILED=16) - parameter(ZMQ_EVENT_CLOSED=128) - parameter(ZMQ_EVENT_CLOSE_FAILED=256) - parameter(ZMQ_EVENT_CONNECTED=1) - parameter(ZMQ_EVENT_CONNECT_DELAYED=2) - parameter(ZMQ_EVENT_CONNECT_RETRIED=4) - parameter(ZMQ_EVENT_DISCONNECTED=512) - parameter(ZMQ_EVENT_HANDSHAKE_FAILED_AUTH=16384) - parameter(ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL=2048) - parameter(ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL=8192) - parameter(ZMQ_EVENT_HANDSHAKE_SUCCEEDED=4096) - parameter(ZMQ_EVENT_LISTENING=8) - parameter(ZMQ_EVENT_MONITOR_STOPPED=1024) - parameter(ZMQ_EVENT_PIPES_STATS=65536) - parameter(ZMQ_FAIL_UNROUTABLE=33) - parameter(ZMQ_FD=14) - parameter(ZMQ_FORWARDER=2) - parameter(ZMQ_GATHER=16) - parameter(ZMQ_GROUP_MAX_LENGTH=255) - parameter(ZMQ_GSSAPI=3) - parameter(ZMQ_GSSAPI_NT_HOSTBASED=0) - parameter(ZMQ_GSSAPI_NT_KRB5_PRINCIPAL=2) - parameter(ZMQ_GSSAPI_NT_USER_NAME=1) - parameter(ZMQ_GSSAPI_PLAINTEXT=65) - parameter(ZMQ_GSSAPI_PRINCIPAL=63) - parameter(ZMQ_GSSAPI_PRINCIPAL_NAMETYPE=90) - parameter(ZMQ_GSSAPI_SERVER=62) - parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL=64) - parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE=91) - parameter(ZMQ_HANDSHAKE_IVL=66) - parameter(ZMQ_HAS_CAPABILITIES=1) - parameter(ZMQ_HAUSNUMERO=156384712) - parameter(ZMQ_HEARTBEAT_IVL=75) - parameter(ZMQ_HEARTBEAT_TIMEOUT=77) - parameter(ZMQ_HEARTBEAT_TTL=76) - parameter(ZMQ_HELLO_MSG=110) - parameter(ZMQ_IDENTITY=5) - parameter(ZMQ_IMMEDIATE=39) - parameter(ZMQ_INVERT_MATCHING=74) - parameter(ZMQ_IN_BATCH_SIZE=101) - parameter(ZMQ_IO_THREADS=1) - parameter(ZMQ_IO_THREADS_DFLT=1) - parameter(ZMQ_IPC_FILTER_GID=60) - parameter(ZMQ_IPC_FILTER_PID=58) - parameter(ZMQ_IPC_FILTER_UID=59) - parameter(ZMQ_IPV4ONLY=31) - parameter(ZMQ_IPV6=42) - parameter(ZMQ_LAST_ENDPOINT=32) - parameter(ZMQ_LINGER=17) - parameter(ZMQ_LOOPBACK_FASTPATH=94) - parameter(ZMQ_MAXMSGSIZE=22) - parameter(ZMQ_MAX_MSGSZ=5) - parameter(ZMQ_MAX_SOCKETS=2) - parameter(ZMQ_MAX_SOCKETS_DFLT=1023) - parameter(ZMQ_MECHANISM=43) - parameter(ZMQ_METADATA=95) - parameter(ZMQ_MORE=1) - parameter(ZMQ_MSG_T_SIZE=6) - parameter(ZMQ_MULTICAST_HOPS=25) - parameter(ZMQ_MULTICAST_LOOP=96) - parameter(ZMQ_MULTICAST_MAXTPDU=84) - parameter(ZMQ_NOBLOCK=1) - parameter(ZMQ_NOTIFY_CONNECT=1) - parameter(ZMQ_NOTIFY_DISCONNECT=2) - parameter(ZMQ_NULL=0) - parameter(ZMQ_ONLY_FIRST_SUBSCRIBE=108) - parameter(ZMQ_OUT_BATCH_SIZE=102) - parameter(ZMQ_PAIR=0) - parameter(ZMQ_PEER=19) - parameter(ZMQ_PLAIN=1) - parameter(ZMQ_PLAIN_PASSWORD=46) - parameter(ZMQ_PLAIN_SERVER=44) - parameter(ZMQ_PLAIN_USERNAME=45) - parameter(ZMQ_POLLERR=4) - parameter(ZMQ_POLLIN=1) - parameter(ZMQ_POLLITEMS_DFLT=16) - parameter(ZMQ_POLLOUT=2) - parameter(ZMQ_POLLPRI=8) - parameter(ZMQ_PRIORITY=112) - parameter(ZMQ_PROBE_ROUTER=51) - parameter(ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED=805306368) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID=536870914) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION=536870915) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA=536870917) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE=536870916) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY=536870913) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED=536870912) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC=285212673) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA=268435480) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE=268435458) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE=268435459) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR=268435477) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO=268435475) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE=268435476) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE=268435474) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY=268435478) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED=268435473) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME=268435479) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH=285212674) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND=268435457) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED=268435456) - parameter(ZMQ_PTR=8) - parameter(ZMQ_PUB=1) - parameter(ZMQ_PULL=7) - parameter(ZMQ_PUSH=8) - parameter(ZMQ_QUEUE=3) - parameter(ZMQ_RADIO=14) - parameter(ZMQ_RATE=8) - parameter(ZMQ_RCVBUF=12) - parameter(ZMQ_RCVHWM=24) - parameter(ZMQ_RCVMORE=13) - parameter(ZMQ_RCVTIMEO=27) - parameter(ZMQ_RECONNECT_IVL=18) - parameter(ZMQ_RECONNECT_IVL_MAX=21) - parameter(ZMQ_RECONNECT_STOP=109) - parameter(ZMQ_RECONNECT_STOP_AFTER_DISCONNECT=3) - parameter(ZMQ_RECONNECT_STOP_CONN_REFUSED=1) - parameter(ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED=2) - parameter(ZMQ_RECOVERY_IVL=9) - parameter(ZMQ_REP=4) - parameter(ZMQ_REQ=3) - parameter(ZMQ_REQ_CORRELATE=52) - parameter(ZMQ_REQ_RELAXED=53) - parameter(ZMQ_ROUTER=6) - parameter(ZMQ_ROUTER_BEHAVIOR=33) - parameter(ZMQ_ROUTER_HANDOVER=56) - parameter(ZMQ_ROUTER_MANDATORY=33) - parameter(ZMQ_ROUTER_NOTIFY=97) - parameter(ZMQ_ROUTER_RAW=41) - parameter(ZMQ_ROUTING_ID=5) - parameter(ZMQ_SCATTER=17) - parameter(ZMQ_SERVER=12) - parameter(ZMQ_SHARED=3) - parameter(ZMQ_SNDBUF=11) - parameter(ZMQ_SNDHWM=23) - parameter(ZMQ_SNDMORE=2) - parameter(ZMQ_SNDTIMEO=28) - parameter(ZMQ_SOCKET_LIMIT=3) - parameter(ZMQ_SOCKS_PASSWORD=100) - parameter(ZMQ_SOCKS_PROXY=68) - parameter(ZMQ_SOCKS_USERNAME=99) - parameter(ZMQ_SRCFD=2) - parameter(ZMQ_STREAM=11) - parameter(ZMQ_STREAMER=1) - parameter(ZMQ_STREAM_NOTIFY=73) - parameter(ZMQ_SUB=2) - parameter(ZMQ_SUBSCRIBE=6) - parameter(ZMQ_TCP_ACCEPT_FILTER=38) - parameter(ZMQ_TCP_KEEPALIVE=34) - parameter(ZMQ_TCP_KEEPALIVE_CNT=35) - parameter(ZMQ_TCP_KEEPALIVE_IDLE=36) - parameter(ZMQ_TCP_KEEPALIVE_INTVL=37) - parameter(ZMQ_TCP_MAXRT=80) - parameter(ZMQ_THREAD_AFFINITY_CPU_ADD=7) - parameter(ZMQ_THREAD_AFFINITY_CPU_REMOVE=8) - parameter(ZMQ_THREAD_NAME_PREFIX=9) - parameter(ZMQ_THREAD_PRIORITY=3) - parameter(ZMQ_THREAD_PRIORITY_DFLT=-1) - parameter(ZMQ_THREAD_SAFE=81) - parameter(ZMQ_THREAD_SCHED_POLICY=4) - parameter(ZMQ_THREAD_SCHED_POLICY_DFLT=-1) - parameter(ZMQ_TOS=57) - parameter(ZMQ_TYPE=16) - parameter(ZMQ_UNSUBSCRIBE=7) - parameter(ZMQ_USE_FD=89) - parameter(ZMQ_VERSION=40304) - parameter(ZMQ_VERSION_MAJOR=4) - parameter(ZMQ_VERSION_MINOR=3) - parameter(ZMQ_VERSION_PATCH=4) - parameter(ZMQ_VMCI_BUFFER_MAX_SIZE=87) - parameter(ZMQ_VMCI_BUFFER_MIN_SIZE=86) - parameter(ZMQ_VMCI_BUFFER_SIZE=85) - parameter(ZMQ_VMCI_CONNECT_TIMEOUT=88) - parameter(ZMQ_WSS_CERT_PEM=104) - parameter(ZMQ_WSS_HOSTNAME=106) - parameter(ZMQ_WSS_KEY_PEM=103) - parameter(ZMQ_WSS_TRUST_PEM=105) - parameter(ZMQ_WSS_TRUST_SYSTEM=107) - parameter(ZMQ_XPUB=9) - parameter(ZMQ_XPUB_MANUAL=71) - parameter(ZMQ_XPUB_MANUAL_LAST_VALUE=98) - parameter(ZMQ_XPUB_NODROP=69) - parameter(ZMQ_XPUB_VERBOSE=40) - parameter(ZMQ_XPUB_VERBOSER=78) - parameter(ZMQ_XPUB_WELCOME_MSG=72) - parameter(ZMQ_XREP=6) - parameter(ZMQ_XREQ=5) - parameter(ZMQ_XSUB=10) - parameter(ZMQ_ZAP_DOMAIN=55) - parameter(ZMQ_ZAP_ENFORCE_DOMAIN=93) - parameter(ZMQ_ZERO_COPY_RECV=10) - integer f77_zmq_bind - external f77_zmq_bind - integer f77_zmq_close - external f77_zmq_close - integer f77_zmq_connect - external f77_zmq_connect - integer f77_zmq_ctx_destroy - external f77_zmq_ctx_destroy - integer f77_zmq_ctx_get - external f77_zmq_ctx_get - integer*8 f77_zmq_ctx_new - external f77_zmq_ctx_new - integer f77_zmq_ctx_set - external f77_zmq_ctx_set - integer f77_zmq_ctx_shutdown - external f77_zmq_ctx_shutdown - integer f77_zmq_ctx_term - external f77_zmq_ctx_term - integer f77_zmq_disconnect - external f77_zmq_disconnect - integer f77_zmq_errno - external f77_zmq_errno - integer f77_zmq_getsockopt - external f77_zmq_getsockopt - integer f77_zmq_microsleep - external f77_zmq_microsleep - integer f77_zmq_msg_close - external f77_zmq_msg_close - integer f77_zmq_msg_copy - external f77_zmq_msg_copy - integer f77_zmq_msg_copy_from_data - external f77_zmq_msg_copy_from_data - integer f77_zmq_msg_copy_to_data - external f77_zmq_msg_copy_to_data - integer f77_zmq_msg_copy_to_data8 - external f77_zmq_msg_copy_to_data8 - integer*8 f77_zmq_msg_data - external f77_zmq_msg_data - integer*8 f77_zmq_msg_data_new - external f77_zmq_msg_data_new - integer f77_zmq_msg_destroy - external f77_zmq_msg_destroy - integer f77_zmq_msg_destroy_data - external f77_zmq_msg_destroy_data - integer f77_zmq_msg_get - external f77_zmq_msg_get - character*(64) f77_zmq_msg_gets - external f77_zmq_msg_gets - integer f77_zmq_msg_init - external f77_zmq_msg_init - integer f77_zmq_msg_init_data - external f77_zmq_msg_init_data - integer f77_zmq_msg_init_size - external f77_zmq_msg_init_size - integer f77_zmq_msg_more - external f77_zmq_msg_more - integer f77_zmq_msg_move - external f77_zmq_msg_move - integer*8 f77_zmq_msg_new - external f77_zmq_msg_new - integer f77_zmq_msg_recv - external f77_zmq_msg_recv - integer*8 f77_zmq_msg_recv8 - external f77_zmq_msg_recv8 - integer f77_zmq_msg_send - external f77_zmq_msg_send - integer*8 f77_zmq_msg_send8 - external f77_zmq_msg_send8 - integer f77_zmq_msg_set - external f77_zmq_msg_set - integer f77_zmq_msg_size - external f77_zmq_msg_size - integer*8 f77_zmq_msg_size8 - external f77_zmq_msg_size8 - integer f77_zmq_poll - external f77_zmq_poll - integer f77_zmq_pollitem_destroy - external f77_zmq_pollitem_destroy - integer*8 f77_zmq_pollitem_new - external f77_zmq_pollitem_new - integer f77_zmq_pollitem_revents - external f77_zmq_pollitem_revents - integer f77_zmq_pollitem_set_events - external f77_zmq_pollitem_set_events - integer f77_zmq_pollitem_set_socket - external f77_zmq_pollitem_set_socket - integer f77_zmq_proxy - external f77_zmq_proxy - integer f77_zmq_proxy_steerable - external f77_zmq_proxy_steerable - integer f77_zmq_recv - external f77_zmq_recv - integer*8 f77_zmq_recv8 - external f77_zmq_recv8 - integer f77_zmq_send - external f77_zmq_send - integer*8 f77_zmq_send8 - external f77_zmq_send8 - integer f77_zmq_send_const - external f77_zmq_send_const - integer*8 f77_zmq_send_const8 - external f77_zmq_send_const8 - integer f77_zmq_setsockopt - external f77_zmq_setsockopt - integer*8 f77_zmq_socket - external f77_zmq_socket - integer f77_zmq_socket_monitor - external f77_zmq_socket_monitor - character*(64) f77_zmq_strerror - external f77_zmq_strerror - integer f77_zmq_term - external f77_zmq_term - integer f77_zmq_unbind - external f77_zmq_unbind - integer f77_zmq_version - external f77_zmq_version - integer pthread_create - external pthread_create - integer pthread_create_arg - external pthread_create_arg - integer pthread_detach - external pthread_detach - integer pthread_join - external pthread_join From d134cc8602231fd41aa7573a216118655169d9d4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 14:57:23 +0100 Subject: [PATCH 57/70] gitignore --- external/Python/.gitignore | 1 + include/.gitignore | 1 + 2 files changed, 2 insertions(+) diff --git a/external/Python/.gitignore b/external/Python/.gitignore index e69de29b..9d09b447 100644 --- a/external/Python/.gitignore +++ b/external/Python/.gitignore @@ -0,0 +1 @@ +docopt.py diff --git a/include/.gitignore b/include/.gitignore index d52be2d2..afcb2de6 100644 --- a/include/.gitignore +++ b/include/.gitignore @@ -5,3 +5,4 @@ zconf.h zlib.h zmq_utils.h f77_zmq_free.h +f77_zmq.h From 8245c7b3f7d304de1f2a83ed6158bd302688d134 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 15:46:35 +0100 Subject: [PATCH 58/70] Revert 63a74c8 --- scripts/compilation/qp_create_ninja | 1 + src/ao_one_e_ints/pseudopot.f90 | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index c0ba8c6a..0f70f4c4 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -121,6 +121,7 @@ def ninja_create_env_variable(pwd_config_file): l_string.append("LIB = {0} ".format(str_lib)) + l_string.append("CONFIG_FILE = {0}".format(pwd_config_file)) l_string.append("") return l_string diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 67dda7b5..141292d8 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -1097,7 +1097,7 @@ implicit none integer nptsgridmax,nptsgrid double precision coefs_pseudo,ptsgrid parameter(nptsgridmax=50) -double precision common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) !!!!! integer npower_orb(3),l,m,i double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3) @@ -1238,7 +1238,7 @@ end double precision coefs_pseudo,ptsgrid double precision p,q,r,s parameter(nptsgridmax=50) - double precision common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) p=1.d0/dsqrt(2.d0) q=1.d0/dsqrt(3.d0) From 2798183c6d704e78b451cb72e00503d71e448a85 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 16:16:04 +0100 Subject: [PATCH 59/70] Removed labels in loc.f --- src/ao_one_e_ints/pseudopot.f90 | 8 +- src/utils/loc.f | 297 +++++++++++++++++--------------- 2 files changed, 163 insertions(+), 142 deletions(-) diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 141292d8..bad641ab 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -1095,9 +1095,9 @@ double precision function overlap_orb_ylm_grid(nptsgrid,r_orb,npower_orb,center_ implicit none !! PSEUDOS integer nptsgridmax,nptsgrid -double precision coefs_pseudo,ptsgrid parameter(nptsgridmax=50) -common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +double precision coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +common/pseudos/coefs_pseudo,ptsgrid !!!!! integer npower_orb(3),l,m,i double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3) @@ -1235,10 +1235,10 @@ end subroutine initpseudos(nptsgrid) implicit none integer nptsgridmax,nptsgrid,ik - double precision coefs_pseudo,ptsgrid double precision p,q,r,s parameter(nptsgridmax=50) - common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + double precision :: coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + common/pseudos/coefs_pseudo,ptsgrid p=1.d0/dsqrt(2.d0) q=1.d0/dsqrt(3.d0) diff --git a/src/utils/loc.f b/src/utils/loc.f index 6e5d7345..02693281 100644 --- a/src/utils/loc.f +++ b/src/utils/loc.f @@ -1,6 +1,6 @@ c************************************************************************ subroutine maxovl(n,m,s,t,w) -C +C C This subprogram contains an iterative procedure to find the C unitary transformation of a set of n vectors which maximizes C the sum of their square overlaps with a set of m reference @@ -10,7 +10,7 @@ C S: overlap matrix C T: rotation matrix C W: new overlap matrix C -C +C implicit real*8(a-h,o-y),logical*1(z) ! parameter (id1=700) ! dimension s(id1,id1),t(id1,id1),w(id1,id1) @@ -29,23 +29,26 @@ C conv=1.d-6 * 5x,'following the principle of maximum overlap with a set of', * i3,' reference vectors'/5x,'required convergence on rotation ', * 'angle =',f13.10///5x,'Starting overlap matrix'/) - do 6 i=1,m - write (6,145) i - 6 write (6,150) (s(i,j),j=1,n) + do i=1,m + write (6,145) i + write (6,150) (s(i,j),j=1,n) + end do 8 mm=m-1 if (m.lt.n) mm=m iter=0 - do 20 j=1,n - do 16 i=1,n - t(i,j)=0.d0 - 16 continue - do 18 i=1,m - 18 w(i,j)=s(i,j) - 20 t(j,j)=1.d0 + do j=1,n + do i=1,n + t(i,j)=0.d0 + end do + do i=1,m + w(i,j)=s(i,j) + enddo + t(j,j)=1.d0 + enddo sum=0.d0 - do 10 i=1,m - sum=sum+s(i,i)*s(i,i) - 10 continue + do i=1,m + sum=sum+s(i,i)*s(i,i) + end do sum=sum/m if (zprt) write (6,12) sum 12 format (//5x,'Average square overlap =',f10.6) @@ -54,18 +57,18 @@ C conv=1.d-6 j=1 21 if (j.ge.last) goto 30 sum=0.d0 - - do 22 i=1,n - 22 sum=sum+s(i,j)*s(i,j) + do i=1,n + sum=sum+s(i,j)*s(i,j) + enddo if (sum.gt.small) goto 28 - do 24 i=1,n - sij=s(i,j) - s(i,j)=-s(i,last) - s(i,last)=sij - tij=t(i,j) - t(i,j)=-t(i,last) - t(i,last)=tij - 24 continue + do i=1,n + sij=s(i,j) + s(i,j)=-s(i,last) + s(i,last)=sij + tij=t(i,j) + t(i,j)=-t(i,last) + t(i,last)=tij + end do last=last-1 goto 21 28 j=j+1 @@ -101,17 +104,18 @@ C conv=1.d-6 sine=1.d0 34 delta=sine*(a*sine+b*cosine) if (zprt.and.delta.lt.0.d0) write (6,71) i,j,a,b,sine,cosine,delta - do 35 k=1,m - p=s(k,i)*cosine-s(k,j)*sine - q=s(k,i)*sine+s(k,j)*cosine - s(k,i)=p - 35 s(k,j)=q - do 40 k=1,n - p=t(k,i)*cosine-t(k,j)*sine - q=t(k,i)*sine+t(k,j)*cosine - t(k,i)=p - t(k,j)=q - 40 continue + do k=1,m + p=s(k,i)*cosine-s(k,j)*sine + q=s(k,i)*sine+s(k,j)*cosine + s(k,i)=p + s(k,j)=q + enddo + do k=1,n + p=t(k,i)*cosine-t(k,j)*sine + q=t(k,i)*sine+t(k,j)*cosine + t(k,i)=p + t(k,j)=q + enddo 45 d=dabs(sine) if (d.le.amax) goto 50 imax=i @@ -132,43 +136,50 @@ C conv=1.d-6 * 'in subroutine maxovl ***'//) stop 100 continue - do 120 j=1,n - if (s(j,j).gt.0.d0) goto 120 - do 105 i=1,m - 105 s(i,j)=-s(i,j) - do 110 i=1,n - 110 t(i,j)=-t(i,j) - 120 continue + do j=1,n + if (s(j,j).gt.0.d0) cycle + do i=1,m + s(i,j)=-s(i,j) + enddo + do i=1,n + t(i,j)=-t(i,j) + enddo + enddo sum=0.d0 - do 125 i=1,m - 125 sum=sum+s(i,i)*s(i,i) + do i=1,m + sum=sum+s(i,i)*s(i,i) + enddo sum=sum/m - do 122 i=1,m - do 122 j=1,n - sw=s(i,j) - s(i,j)=w(i,j) - 122 w(i,j)=sw + do i=1,m + do j=1,n + sw=s(i,j) + s(i,j)=w(i,j) + w(i,j)=sw + enddo + enddo if (.not.zprt) return write (6,12) sum write (6,130) 130 format (//5x,'transformation matrix') - do 140 i=1,n - write (6,145) i - 140 write (6,150) (t(i,j),j=1,n) + do i=1,n + write (6,145) i + write (6,150) (t(i,j),j=1,n) + enddo 145 format (i8) 150 format (2x,10f12.8) write (6,160) 160 format (//5x,'new overlap matrix'/) - do 170 i=1,m - write (6,145) i - 170 write (6,150) (w(i,j),j=1,n) + do i=1,m + write (6,145) i + write (6,150) (w(i,j),j=1,n) + enddo return end c************************************************************************ subroutine maxovl_no_print(n,m,s,t,w) -C +C C This subprogram contains an iterative procedure to find the C unitary transformation of a set of n vectors which maximizes C the sum of their square overlaps with a set of m reference @@ -178,7 +189,7 @@ C S: overlap matrix C T: rotation matrix C W: new overlap matrix C -C +C implicit real*8(a-h,o-y),logical*1(z) parameter (id1=300) dimension s(id1,id1),t(id1,id1),w(id1,id1) @@ -193,17 +204,19 @@ C conv=1.d-6 8 mm=m-1 if (m.lt.n) mm=m iter=0 - do 20 j=1,n - do 16 i=1,n - t(i,j)=0.d0 - 16 continue - do 18 i=1,m - 18 w(i,j)=s(i,j) - 20 t(j,j)=1.d0 + do j=1,n + do i=1,n + t(i,j)=0.d0 + enddo + do i=1,m + w(i,j)=s(i,j) + enddo + t(j,j)=1.d0 + enddo sum=0.d0 - do 10 i=1,m - sum=sum+s(i,i)*s(i,i) - 10 continue + do i=1,m + sum=sum+s(i,i)*s(i,i) + enddo sum=sum/m 12 format (//5x,'Average square overlap =',f10.6) if (n.eq.1) goto 100 @@ -211,18 +224,19 @@ C conv=1.d-6 j=1 21 if (j.ge.last) goto 30 sum=0.d0 - - do 22 i=1,n - 22 sum=sum+s(i,j)*s(i,j) + + do i=1,n + sum=sum+s(i,j)*s(i,j) + enddo if (sum.gt.small) goto 28 - do 24 i=1,n - sij=s(i,j) - s(i,j)=-s(i,last) - s(i,last)=sij - tij=t(i,j) - t(i,j)=-t(i,last) - t(i,last)=tij - 24 continue + do i=1,n + sij=s(i,j) + s(i,j)=-s(i,last) + s(i,last)=sij + tij=t(i,j) + t(i,j)=-t(i,last) + t(i,last)=tij + end do last=last-1 goto 21 28 j=j+1 @@ -232,50 +246,52 @@ C conv=1.d-6 jmax=0 dmax=0.d0 amax=0.d0 - do 60 i=1,mm - ip=i+1 - do 50 j=ip,n - a=s(i,j)*s(i,j)-s(i,i)*s(i,i) - b=-s(i,i)*s(i,j) - if (j.gt.m) goto 31 - a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j) - b=b+s(j,i)*s(j,j) - 31 b=b+b - if (a.eq.0.d0) goto 32 - ba=b/a - if (dabs(ba).gt.small) goto 32 - if (a.gt.0.d0) goto 33 - tang=-0.5d0*ba - cosine=1.d0/dsqrt(1.d0+tang*tang) - sine=tang*cosine - goto 34 - 32 tang=0.d0 - if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b - cosine=1.d0/dsqrt(1.d0+tang*tang) - sine=tang*cosine - goto 34 - 33 cosine=0.d0 - sine=1.d0 - 34 delta=sine*(a*sine+b*cosine) - do 35 k=1,m - p=s(k,i)*cosine-s(k,j)*sine - q=s(k,i)*sine+s(k,j)*cosine - s(k,i)=p - 35 s(k,j)=q - do 40 k=1,n - p=t(k,i)*cosine-t(k,j)*sine - q=t(k,i)*sine+t(k,j)*cosine - t(k,i)=p - t(k,j)=q - 40 continue - 45 d=dabs(sine) - if (d.le.amax) goto 50 - imax=i - jmax=j - amax=d - dmax=delta - 50 continue - 60 continue + do i=1,mm + ip=i+1 + do j=ip,n + a=s(i,j)*s(i,j)-s(i,i)*s(i,i) + b=-s(i,i)*s(i,j) + if (j.gt.m) goto 31 + a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j) + b=b+s(j,i)*s(j,j) + 31 b=b+b + if (a.eq.0.d0) goto 32 + ba=b/a + if (dabs(ba).gt.small) goto 32 + if (a.gt.0.d0) goto 33 + tang=-0.5d0*ba + cosine=1.d0/dsqrt(1.d0+tang*tang) + sine=tang*cosine + goto 34 + 32 tang=0.d0 + if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b + cosine=1.d0/dsqrt(1.d0+tang*tang) + sine=tang*cosine + goto 34 + 33 cosine=0.d0 + sine=1.d0 + 34 delta=sine*(a*sine+b*cosine) + do k=1,m + p=s(k,i)*cosine-s(k,j)*sine + q=s(k,i)*sine+s(k,j)*cosine + s(k,i)=p + s(k,j)=q + enddo + do k=1,n + p=t(k,i)*cosine-t(k,j)*sine + q=t(k,i)*sine+t(k,j)*cosine + t(k,i)=p + t(k,j)=q + enddo + 45 d=dabs(sine) + if (d.le.amax) goto 50 + imax=i + jmax=j + amax=d + dmax=delta + 50 continue + end do + end do 70 format (' iter=',i4,' largest rotation=',f12.8, * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5) 71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5) @@ -285,22 +301,27 @@ C conv=1.d-6 * 'in subroutine maxovl ***'//) stop 100 continue - do 120 j=1,n - if (s(j,j).gt.0.d0) goto 120 - do 105 i=1,m - 105 s(i,j)=-s(i,j) - do 110 i=1,n - 110 t(i,j)=-t(i,j) - 120 continue + do j=1,n + if (s(j,j).gt.0.d0) cycle + do i=1,m + s(i,j)=-s(i,j) + enddo + do i=1,n + t(i,j)=-t(i,j) + enddo + enddo sum=0.d0 - do 125 i=1,m - 125 sum=sum+s(i,i)*s(i,i) + do i=1,m + sum=sum+s(i,i)*s(i,i) + enddo sum=sum/m - do 122 i=1,m - do 122 j=1,n - sw=s(i,j) - s(i,j)=w(i,j) - 122 w(i,j)=sw + do i=1,m + do j=1,n + sw=s(i,j) + s(i,j)=w(i,j) + w(i,j)=sw + enddo + enddo return end From 5da97f2fbb0f00dcfbdff85e1cb9b5e8c3c839fa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 16:16:47 +0100 Subject: [PATCH 60/70] Fixed merge --- src/csf/sigma_vector.irp.f | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 5c3c8fe1..833fa7b0 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -26,8 +26,6 @@ NSOMOMin = max(0,cfg_nsomo_min-2) else NSOMOMin = max(1,cfg_nsomo_min-2) - else - NSOMOMin = max(1,cfg_nsomo_min-2) endif ! Note that here we need NSOMOMax + 2 sizes ialpha = (NSOMOMax + MS)/2 From 50057251a79daaa9ec47c6233dc5376b01fc6fc7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 19 Nov 2022 16:41:44 +0100 Subject: [PATCH 61/70] Fixed type error in localization --- ocaml/qp_run.ml | 2 +- src/cis/cis.irp.f | 2 +- src/determinants/EZFIO.cfg | 7 +- src/mo_localization/localization.org | 385 +++++++++++---------- src/mo_localization/localization_sub.irp.f | 271 +++++++-------- 5 files changed, 334 insertions(+), 333 deletions(-) diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index d096b15b..dfbab167 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -110,7 +110,7 @@ let run slave ?prefix exe ezfio_file = let task_thread = let thread = Thread.create ( fun () -> - TaskServer.run port_number ) + TaskServer.run ~port:port_number ) in thread (); in diff --git a/src/cis/cis.irp.f b/src/cis/cis.irp.f index cc047622..ab2294ad 100644 --- a/src/cis/cis.irp.f +++ b/src/cis/cis.irp.f @@ -79,6 +79,6 @@ subroutine run call ezfio_set_cis_energy(CI_energy) psi_coef = ci_eigenvectors SOFT_TOUCH psi_coef - call save_wavefunction_truncated(thresh_save_wf) + call save_wavefunction_truncated(save_threshold) end diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg index b9c736a9..c6323cd0 100644 --- a/src/determinants/EZFIO.cfg +++ b/src/determinants/EZFIO.cfg @@ -136,9 +136,8 @@ doc: If |true|, discard any Slater determinants with an interaction smaller than interface: ezfio,provider,ocaml default: False - -[thresh_save_wf] +[save_threshold] type: Threshold -doc: Thresholds to save wave function +doc: Cut-off to apply to the CI coefficients when the wave function is stored interface: ezfio,provider,ocaml -default: 1.e-15 +default: 1.e-14 diff --git a/src/mo_localization/localization.org b/src/mo_localization/localization.org index 293ea06b..ad42ef74 100644 --- a/src/mo_localization/localization.org +++ b/src/mo_localization/localization.org @@ -14,7 +14,7 @@ The program localizes the orbitals in function of their mo_class: Core MOs are localized with core MOs, inactives MOs are localized with inactives MOs and so on. But deleted orbitals are not localized. -WARNING: +WARNING: - The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class is false the localization will be done for all the orbitals between them, so the occupied and virtual MOs will be combined together @@ -41,15 +41,15 @@ Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) doi: 10.1063/1.1681683 Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, -1456– 1462. DOI: 10.1002/jcc.23281 +1456– 1462. DOI: 10.1002/jcc.23281 Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory Comput. 2012, 8, 9, 3137–3146 DOI: https://doi.org/10.1021/ct300473g Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 (2012) -DOI: https://doi.org/10.1063/1.4769866 +DOI: https://doi.org/10.1063/1.4769866 Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt -Rev. Mod. Phys. 84, 1419 +Rev. Mod. Phys. 84, 1419 https://doi.org/10.1103/RevModPhys.84.1419 The Foster-Boys localization is a method to generate localized MOs @@ -58,7 +58,7 @@ $$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | \phi_i >^2 \right] $$. In fact it is equivalent to maximise $$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < -\phi_j | r | \phi_j > \left]^2$$ +\phi_j | r | \phi_j > \left]^2$$ or $$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ @@ -66,7 +66,7 @@ Locality of the orbitals: \begin{align*} \sigma_i &= \sqrt{ - ^2} \\ &= \sqrt{ - ^2 + - ^2 + - ^2} -\end{align*} +\end{align*} *** Pipek-Mezey localization @@ -102,9 +102,9 @@ The indexes i and j refere to the positions of the elements in the "full space", i.e., the arrays containing elements for all the MOs, but the indexes tmp_i and tmp_j to the positions of the elements in the "reduced space/subspace", i.e., the arrays containing elements for -a restricted number of MOs. +a restricted number of MOs. Example: -The gradient for the localization of the core MOs can be expressed +The gradient for the localization of the core MOs can be expressed as a vector of length mo_num*(mo_num-1)/2 with only n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant to use a vector of size n_act_orb*(n_core_orb-1)/2. @@ -121,7 +121,7 @@ Ex gradient for 4 core orbitales: 0 & -a & -b & -d & \hdots & 0 \\ a & 0 & -c & -e & \hdots & 0 \\ b & c & 0 & -f & \hdots & 0 \\ -d & e & f & 0 & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & 0 & \hdots & 0 \\ \end{pmatrix} @@ -136,14 +136,14 @@ f \\ \vdots \\ 0 \\ \end{pmatrix} -\end{align*} +\end{align*} \begin{align*} \begin{pmatrix} 0 & -a & -b & -d & \hdots & 0 \\ a & 0 & -c & -e & \hdots & 0 \\ b & c & 0 & -f & \hdots & 0 \\ -d & e & f & 0 & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & 0 & \hdots & 0 \\ \end{pmatrix} @@ -152,7 +152,7 @@ d & e & f & 0 & \hdots & 0 \\ 0 & -a & -b & -d \\ a & 0 & -c & -e \\ b & c & 0 & -f \\ -d & e & f & 0 \\ +d & e & f & 0 \\ \end{pmatrix} \Rightarrow \begin{pmatrix} @@ -173,7 +173,7 @@ consecutives since it's done with lists of MOs: a & 0 & 0 & -c & -e & \hdots & 0 \\ 0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ b & c & 0 & 0 & -f & \hdots & 0 \\ -d & e & 0 & f & 0 & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ \vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ \end{pmatrix} @@ -182,7 +182,7 @@ d & e & 0 & f & 0 & \hdots & 0 \\ 0 & -a & -b & -d \\ a & 0 & -c & -e \\ b & c & 0 & -f \\ -d & e & f & 0 \\ +d & e & f & 0 \\ \end{pmatrix} \Rightarrow \begin{pmatrix} @@ -199,7 +199,7 @@ The dipoles are updated using the "ao to mo" subroutine without the log(N^2). The bottleneck of the program is normally N^3 with the matrix multiplications/diagonalizations. The use of the full hessian can be -an improvement but it will scale in N^4... +an improvement but it will scale in N^4... ** Program @@ -231,7 +231,7 @@ Variables: | W(:,:) | double precision | Eigenvectors of the hessian | | tmp_x(:) | double precision | Step in 1D (in a subaspace) | | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | -| tmp_list(:) | double precision | List of MOs in a mo_class | +| tmp_list(:) | integer | List of MOs in a mo_class | | i,j,k | integer | Indexes in the full MO space | | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | | l | integer | Index for the mo_class | @@ -309,12 +309,12 @@ subroutine run_localization ! Localization criterion (FB, PM, ...) for each mo_class print*,'### Before the pre rotation' - + ! Debug if (debug_hf) then print*,'HF energy:', HF_energy endif - + do l = 1, 4 if (l==1) then ! core tmp_list_size = dim_list_core_orb @@ -326,7 +326,7 @@ subroutine run_localization tmp_list_size = dim_list_virt_orb endif - ! Allocation tmp array + ! Allocation tmp array allocate(tmp_list(tmp_list_size)) ! To give the list of MOs in a mo_class @@ -345,7 +345,7 @@ subroutine run_localization print*,'Criterion:', criterion, mo_class(tmp_list(1)) endif - deallocate(tmp_list) + deallocate(tmp_list) enddo @@ -366,7 +366,7 @@ subroutine run_localization print*, 'abort' call abort - + endif #+END_SRC @@ -378,13 +378,13 @@ subroutine run_localization ! Criterion after the pre rotation ! Localization criterion (FB, PM, ...) for each mo_class print*,'### After the pre rotation' - + ! Debug if (debug_hf) then touch mo_coef print*,'HF energy:', HF_energy endif - + do l = 1, 4 if (l==1) then ! core tmp_list_size = dim_list_core_orb @@ -395,9 +395,9 @@ subroutine run_localization else ! virt tmp_list_size = dim_list_virt_orb endif - + if (tmp_list_size >= 2) then - ! Allocation tmp array + ! Allocation tmp array allocate(tmp_list(tmp_list_size)) ! To give the list of MOs in a mo_class @@ -413,10 +413,10 @@ subroutine run_localization call criterion_localization(tmp_list_size, tmp_list,criterion) print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) - - deallocate(tmp_list) + + deallocate(tmp_list) endif - + enddo ! Debug @@ -426,7 +426,7 @@ subroutine run_localization print*,'========================' print*,' Orbital localization' print*,'========================' - print*,'' + print*,'' !Initialization not_converged = .TRUE. @@ -435,9 +435,9 @@ subroutine run_localization if (dim_list_core_orb >= 2) then not_core_converged = .TRUE. else - not_core_converged = .FALSE. + not_core_converged = .FALSE. endif - + if (dim_list_act_orb >= 2) then not_act_converged = .TRUE. else @@ -455,7 +455,7 @@ subroutine run_localization else not_virt_converged = .FALSE. endif - + ! Loop over the mo_classes do l = 1, 4 @@ -473,12 +473,12 @@ subroutine run_localization tmp_list_size = dim_list_virt_orb endif - ! Next iteration if converged = true + ! Next iteration if converged = true if (.not. not_converged) then cycle endif - - ! Allocation tmp array + + ! Allocation tmp array allocate(tmp_list(tmp_list_size)) ! To give the list of MOs in a mo_class @@ -499,12 +499,12 @@ subroutine run_localization print*,'' endif - ! Size for the 2D -> 1D transformation + ! Size for the 2D -> 1D transformation tmp_n = tmp_list_size * (tmp_list_size - 1)/2 - ! Without hessian + trust region + ! Without hessian + trust region if (.not. localization_use_hessian) then - + ! Allocation of temporary arrays allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) @@ -518,13 +518,13 @@ subroutine run_localization !Loop do while (not_converged) - + print*,'' print*,'***********************' print*,'Iteration', nb_iter print*,'***********************' print*,'' - + ! Angles of rotation call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) tmp_m_x = - tmp_m_x * delta @@ -554,7 +554,7 @@ subroutine run_localization print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion print*,'Max elem :', max_elem print*,'Delta :', delta - + nb_iter = nb_iter + 1 ! Exit @@ -574,7 +574,7 @@ subroutine run_localization ! Trust region else - + ! Allocation of temporary arrays allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) allocate(tmp_R(tmp_list_size, tmp_list_size)) @@ -596,12 +596,12 @@ subroutine run_localization print*,'Iteration', nb_iter print*,'***********************' print*,'' - + ! Gradient call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) ! Diagonal hessian call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) - + ! Diagonalization of the diagonal hessian by hands !call diagonalization_hessian(tmp_n,H,e_val,w) do i = 1, tmp_n @@ -609,7 +609,7 @@ subroutine run_localization enddo ! Key list for dsort - do i = 1, tmp_n + do i = 1, tmp_n key(i) = i enddo @@ -625,7 +625,7 @@ subroutine run_localization ! To enter in the loop just after cancel_step = .True. - nb_sub_iter = 0 + nb_sub_iter = 0 ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho do while (cancel_step) @@ -635,17 +635,17 @@ subroutine run_localization print*,'Sub iteration:', nb_sub_iter print*,'-----------------------------' - ! Hessian,gradient,Criterion -> x + ! Hessian,gradient,Criterion -> x call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & rho, nb_iter, delta, criterion_model, tmp_x, must_exit) ! Internal loop exit condition if (must_exit) then print*,'trust_region_step_w_expected_e sent: Exit' - exit + exit endif - ! 1D tmp -> 2D tmp + ! 1D tmp -> 2D tmp call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) @@ -660,9 +660,9 @@ subroutine run_localization ! tmp_R to R, subspace to full space call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) - + ! Rotation of the MOs - call apply_mo_rotation(R, prev_mos) + call apply_mo_rotation(R, prev_mos) ! Update the things related to mo_coef call update_data_localization() @@ -671,7 +671,7 @@ subroutine run_localization call criterion_localization(tmp_list_size, tmp_list, criterion) print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion - ! Criterion -> step accepted or rejected + ! Criterion -> step accepted or rejected call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & criterion_model, rho, cancel_step) @@ -687,7 +687,7 @@ subroutine run_localization ! To exit the external loop if must_exti = .True. if (must_exit) then exit - endif + endif ! Step accepted, nb iteration + 1 nb_iter = nb_iter + 1 @@ -703,22 +703,22 @@ subroutine run_localization ! Deallocation of temporary arrays deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) - + ! Save the MOs call save_mos() TOUCH mo_coef - + ! Debug if (debug_hf) then touch mo_coef print*,'HF energy:', HF_energy endif - + endif enddo - TOUCH mo_coef + TOUCH mo_coef ! To sort the MOs using the diagonal elements of the Fock matrix if (sort_mos_by_e) then @@ -734,7 +734,7 @@ subroutine run_localization ! Locality after the localization call compute_spatial_extent(spatial_extent) -end +end #+END_SRC ** Gathering @@ -743,10 +743,10 @@ They are chosen in function of the localization method Gradient: -qp_edit : +qp_edit : | localization_method | method for the localization | -Input: +Input: | tmp_n | integer | Number of parameters in the MO subspace | | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | | tmp_list(tmp_list_size) | integer | MOs in the mo_class | @@ -759,7 +759,7 @@ Output: #+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - + include 'pi.h' implicit none @@ -767,7 +767,7 @@ subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_ele BEGIN_DOC ! Compute the gradient of the chosen localization method END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad @@ -827,7 +827,7 @@ Output: subroutine criterion_localization(tmp_list_size, tmp_list,criterion) include 'pi.h' - + implicit none BEGIN_DOC @@ -881,7 +881,7 @@ Output: #+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) - + include 'pi.h' implicit none @@ -889,7 +889,7 @@ subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) BEGIN_DOC ! Compute the rotation angles between the MOs for the chosen localization method END_DOC - + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem @@ -907,7 +907,7 @@ end ** Foster-Boys *** Gradient -Input: +Input: | tmp_n | integer | Number of parameters in the MO subspace | | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | | tmp_list(tmp_list_size) | integer | MOs in the mo_class | @@ -925,13 +925,13 @@ Internal: #+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - + implicit none - + BEGIN_DOC ! Compute the gradient for the Foster-Boys localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:) @@ -957,11 +957,11 @@ subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) enddo enddo - + ! 2D -> 1D do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient @@ -970,8 +970,8 @@ subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr if (ABS(v_grad(tmp_k)) > max_elem) then max_elem = ABS(v_grad(tmp_k)) endif - enddo - + enddo + ! Norm of the gradient norm_grad = 0d0 do tmp_k = 1, tmp_n @@ -980,7 +980,7 @@ subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr norm_grad = dsqrt(norm_grad) print*, 'Maximal element in the gradient:', max_elem - print*, 'Norm of the gradient:', norm_grad + print*, 'Norm of the gradient:', norm_grad ! Deallocation deallocate(m_grad) @@ -999,7 +999,7 @@ end subroutine *** Gradient (OMP) #+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - + use omp_lib implicit none @@ -1007,7 +1007,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor BEGIN_DOC ! Compute the gradient for the Foster-Boys localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:) @@ -1048,8 +1048,8 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor !$OMP DO do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) - enddo + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo !$OMP END DO !$OMP END PARALLEL @@ -1062,7 +1062,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor if (ABS(v_grad(tmp_k)) > max_elem) then max_elem = ABS(v_grad(tmp_k)) endif - enddo + enddo ! Norm of the gradient norm_grad = 0d0 @@ -1072,7 +1072,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor norm_grad = dsqrt(norm_grad) print*, 'Maximal element in the gradient:', max_elem - print*, 'Norm of the gradient:', norm_grad + print*, 'Norm of the gradient:', norm_grad ! Deallocation deallocate(m_grad) @@ -1088,7 +1088,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor end subroutine #+END_SRC -*** Hessian +*** Hessian Output: | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | @@ -1106,7 +1106,7 @@ Internal: subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) implicit none - + BEGIN_DOC ! Compute the diagonal hessian for the Foster-Boys localization END_DOC @@ -1116,7 +1116,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:) integer :: i,j,tmp_k,tmp_i, tmp_j double precision :: max_elem, t1,t2,t3 - + print*,'' print*,'---hessian_FB---' print*,'' @@ -1126,7 +1126,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! Allocation allocate(beta(tmp_list_size,tmp_list_size)) - + ! Calculation do tmp_j = 1, tmp_list_size j = tmp_list(tmp_j) @@ -1144,7 +1144,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) call vec_to_mat_index(tmp_k,tmp_i,tmp_j) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo - + ! Min elem max_elem = 0d0 do tmp_k = 1, tmp_n @@ -1162,7 +1162,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) endif enddo print*, 'Max elem H:', max_elem - + ! Near 0 max_elem = 1d10 do tmp_k = 1, tmp_n @@ -1174,7 +1174,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! Deallocation deallocate(beta) - + call wall_time(t2) t3 = t2 - t1 print*,'Time in hessian_FB:', t3 @@ -1201,7 +1201,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:) integer :: i,j,tmp_k,tmp_i,tmp_j double precision :: max_elem, t1,t2,t3 - + print*,'' print*,'---hessian_FB_omp---' print*,'' @@ -1219,7 +1219,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & !$OMP DEFAULT(NONE) - + ! Calculation !$OMP DO do tmp_j = 1, tmp_list_size @@ -1237,11 +1237,11 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) !$OMP DO do j = 1, tmp_n do i = 1, tmp_n - H(i,j) = 0d0 + H(i,j) = 0d0 enddo enddo !$OMP END DO - + ! Diagonalm of the hessian !$OMP DO do tmp_k = 1, tmp_n @@ -1249,7 +1249,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo !$OMP END DO - + !$OMP END PARALLEL call omp_set_max_active_levels(4) @@ -1271,7 +1271,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) endif enddo print*, 'Max elem H:', max_elem - + ! Near 0 max_elem = 1d10 do tmp_k = 1, tmp_n @@ -1283,7 +1283,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) ! Deallocation deallocate(beta) - + call wall_time(t2) t3 = t2 - t1 print*,'Time in hessian_FB_omp:', t3 @@ -1309,12 +1309,12 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:), tmp_int(:,:) - integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho ! Allocation allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) - ! Initialization + ! Initialization m_grad = 0d0 do a = 1, nucl_num ! loop over the nuclei @@ -1322,12 +1322,12 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra ! Loop over the MOs of the a given mo_class to compute do tmp_j = 1, tmp_list_size - j = tmp_list(tmp_j) + j = tmp_list(tmp_j) do tmp_i = 1, tmp_list_size i = tmp_list(tmp_i) do rho = 1, ao_num ! loop over all the AOs do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a - mu = nucl_aos(a,b) ! AO centered on atom a + mu = nucl_aos(a,b) ! AO centered on atom a tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) @@ -1351,7 +1351,7 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra ! 2D -> 1D do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient @@ -1397,7 +1397,7 @@ $\sum_{\rho}$ -> sum over all the AOs $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A $c^t$ -> expansion coefficient of orbital |t> -Input: +Input: | tmp_n | integer | Number of parameters in the MO subspace | | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | | tmp_list(tmp_list_size) | integer | MOs in the mo_class | @@ -1432,7 +1432,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr BEGIN_DOC ! Compute gradient for the Pipek-Mezey localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) @@ -1456,12 +1456,12 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr do j = 1, ao_num tmp_mo_coef(j,tmp_i) = mo_coef(j,i) - + enddo enddo call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + m_grad = 0d0 do a = 1, nucl_num ! loop over the nuclei @@ -1491,7 +1491,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr enddo enddo - + do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) do tmp_i = 1, tmp_list_size @@ -1499,14 +1499,14 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr tmp_CS(tmp_i,b) = CS(tmp_i,mu) enddo - enddo + enddo call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) - + do tmp_j = 1, tmp_list_size do tmp_i = 1, tmp_list_size - tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) enddo enddo @@ -1526,7 +1526,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr ! 2D -> 1D do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient @@ -1535,7 +1535,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr if (ABS(v_grad(tmp_k)) > max_elem) then max_elem = ABS(v_grad(tmp_k)) endif - enddo + enddo ! Norm of the gradient norm_grad = 0d0 @@ -1576,7 +1576,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:),tmp_int(:,:) integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu double precision :: max_elem - + ! Allocation allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) @@ -1597,7 +1597,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) enddo - enddo + enddo enddo enddo @@ -1609,7 +1609,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) enddo enddo - + enddo H = 0d0 @@ -1617,7 +1617,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) call vec_to_mat_index(tmp_k,tmp_i,tmp_j) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo - + ! max_elem = 0d0 ! do tmp_k = 1, tmp_n ! if (H(tmp_k,tmp_k) < max_elem) then @@ -1633,7 +1633,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) ! endif ! enddo ! print*, 'Max elem H:', max_elem -! +! ! max_elem = 1d10 ! do tmp_k = 1, tmp_n ! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then @@ -1670,7 +1670,7 @@ $c^t$ -> expansion coefficient of orbital |t> subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) implicit none - + BEGIN_DOC ! Compute diagonal hessian for the Pipek-Mezey localization END_DOC @@ -1680,7 +1680,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu double precision :: max_elem, t1,t2,t3 - + print*,'' print*,'---hessian_PM---' print*,'' @@ -1698,12 +1698,12 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) do j = 1, ao_num tmp_mo_coef(j,tmp_i) = mo_coef(j,i) - + enddo enddo call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + do a = 1, nucl_num ! loop over the nuclei tmp_int = 0d0 @@ -1720,7 +1720,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) ! enddo ! enddo !enddo - + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) do tmp_i = 1, tmp_list_size @@ -1731,7 +1731,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) enddo enddo - + do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) do tmp_i = 1, tmp_list_size @@ -1739,14 +1739,14 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) tmp_CS(tmp_i,b) = CS(tmp_i,mu) enddo - enddo + enddo call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) - + do tmp_j = 1, tmp_list_size do tmp_i = 1, tmp_list_size - tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) enddo enddo @@ -1761,7 +1761,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) enddo enddo - + enddo H = 0d0 @@ -1769,7 +1769,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) call vec_to_mat_index(tmp_k,tmp_i,tmp_j) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo - + max_elem = 0d0 do tmp_k = 1, tmp_n if (H(tmp_k,tmp_k) < max_elem) then @@ -1785,7 +1785,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) endif enddo print*, 'Max elem H:', max_elem - + max_elem = 1d10 do tmp_k = 1, tmp_n if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then @@ -1815,18 +1815,18 @@ end subroutine compute_crit_pipek(criterion) implicit none - + BEGIN_DOC ! Compute the Pipek-Mezey localization criterion END_DOC double precision, intent(out) :: criterion double precision, allocatable :: tmp_int(:,:) - integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho ! Allocation allocate(tmp_int(mo_num, mo_num)) - + criterion = 0d0 do a = 1, nucl_num ! loop over the nuclei @@ -1841,16 +1841,16 @@ subroutine compute_crit_pipek(criterion) + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) enddo - enddo + enddo enddo - do i = 1, mo_num + do i = 1, mo_num criterion = criterion + tmp_int(i,i)**2 enddo enddo - - criterion = - criterion + + criterion = - criterion deallocate(tmp_int) @@ -1863,7 +1863,7 @@ The criterion is computed as \begin{align*} \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 \end{align*} -with +with \begin{align*} = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] \end{align*} @@ -1872,7 +1872,7 @@ with subroutine criterion_PM(tmp_list_size,tmp_list,criterion) implicit none - + BEGIN_DOC ! Compute the Pipek-Mezey localization criterion END_DOC @@ -1881,18 +1881,18 @@ subroutine criterion_PM(tmp_list_size,tmp_list,criterion) double precision, intent(out) :: criterion double precision, allocatable :: tmp_int(:,:),CS(:,:) integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho - + print*,'' print*,'---criterion_PM---' - + ! Allocation allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) - + ! Initialization criterion = 0d0 call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + do a = 1, nucl_num ! loop over the nuclei tmp_int = 0d0 @@ -1900,7 +1900,7 @@ subroutine criterion_PM(tmp_list_size,tmp_list,criterion) i = tmp_list(tmp_i) do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a mu = nucl_aos(a,b) - + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & @@ -1914,8 +1914,8 @@ subroutine criterion_PM(tmp_list_size,tmp_list,criterion) enddo enddo - - criterion = - criterion + + criterion = - criterion deallocate(tmp_int,CS) @@ -1930,11 +1930,11 @@ end subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) implicit none - + BEGIN_DOC ! Compute the Pipek-Mezey localization criterion END_DOC - + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: criterion double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) @@ -1943,7 +1943,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) print*,'' print*,'---criterion_PM_v3---' - + call wall_time(t1) ! Allocation @@ -1958,16 +1958,16 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) do j = 1, ao_num tmp_mo_coef(j,tmp_i) = mo_coef(j,i) - + enddo enddo ! ao_overlap(ao_num,ao_num) ! mo_coef(ao_num,mo_num) call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + do a = 1, nucl_num ! loop over the nuclei - + do j = 1, tmp_list_size do i = 1, tmp_list_size tmp_int(i,j) = 0d0 @@ -1989,7 +1989,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) !enddo allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) - + do tmp_i = 1, tmp_list_size do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) @@ -1998,7 +1998,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) enddo enddo - + do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) do tmp_i = 1, tmp_list_size @@ -2006,15 +2006,15 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) tmp_CS(tmp_i,b) = CS(tmp_i,mu) enddo - enddo + enddo call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) - + ! Integrals do tmp_j = 1, tmp_list_size do tmp_i = 1, tmp_list_size - tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) enddo enddo @@ -2028,7 +2028,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) enddo - criterion = - criterion + criterion = - criterion deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) @@ -2265,16 +2265,16 @@ subroutine theta_FB(l, n, m_x, max_elem) !print*,idx_i,idx_j,max_elem max_elem = dabs(max_elem) - + deallocate(cos4theta, sin4theta) deallocate(A,B,beta,gamma) - + end #+end_src #+begin_src f90 :comments org :tangle localization_sub.irp.f subroutine theta_PM(l, n, m_x, max_elem) - + include 'pi.h' BEGIN_DOC @@ -2297,18 +2297,18 @@ subroutine theta_PM(l, n, m_x, max_elem) ! Loop over the MOs of the a given mo_class to compute do tmp_j = 1, n - j = l(tmp_j) + j = l(tmp_j) do tmp_i = 1, n i = l(tmp_i) do rho = 1, ao_num ! loop over all the AOs do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a - mu = nucl_aos(a,b) ! AO centered on atom a + mu = nucl_aos(a,b) ! AO centered on atom a Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) enddo - enddo + enddo enddo enddo @@ -2318,7 +2318,7 @@ subroutine theta_PM(l, n, m_x, max_elem) Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 enddo enddo - + ! B do j = 1, n do i = 1, n @@ -2372,7 +2372,7 @@ end ** Spatial extent -The spatial extent of an orbital $i$ is computed as +The spatial extent of an orbital $i$ is computed as \begin{align*} \sum_{\lambda=x,y,z}\sqrt{ - ^2} \end{align*} @@ -2387,14 +2387,14 @@ subroutine compute_spatial_extent(spatial_extent) BEGIN_DOC ! Compute the spatial extent of the MOs END_DOC - + double precision, intent(out) :: spatial_extent(mo_num) double precision :: average_core, average_act, average_inact, average_virt double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt integer :: i,j,k,l spatial_extent = 0d0 - + do i = 1, mo_num spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 enddo @@ -2422,7 +2422,7 @@ subroutine compute_spatial_extent(spatial_extent) call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) endif - + average_inact = 0d0 std_var_inact = 0d0 if (dim_list_inact_orb >= 2) then @@ -2452,12 +2452,12 @@ subroutine compute_spatial_extent(spatial_extent) print*, 'virt:', dim_list_virt_orb print*, 'mo_num:', mo_num print*,'' - + print*,'-- Core MOs --' print*,'Average:', average_core print*,'Std var:', std_var_core print*,'' - + print*,'-- Active MOs --' print*,'Average:', average_act print*,'Std var:', std_var_act @@ -2485,7 +2485,7 @@ end subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) implicit none - + BEGIN_DOC ! Compute the average spatial extent of the MOs END_DOC @@ -2494,7 +2494,7 @@ subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) double precision, intent(in) :: spatial_extent(mo_num) double precision, intent(out) :: average integer :: i, tmp_i - + average = 0d0 do tmp_i = 1, list_size i = list(tmp_i) @@ -2527,7 +2527,7 @@ subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_ i = list(tmp_i) std_var = std_var + (spatial_extent(i) - average)**2 enddo - + std_var = dsqrt(1d0/DBLE(list_size) * std_var) end @@ -2575,7 +2575,7 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Pre rotation for active MOs if (dim_list_act_orb >= 2) then do tmp_j = 1, dim_list_act_orb @@ -2592,7 +2592,7 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Pre rotation for inactive MOs if (dim_list_inact_orb >= 2) then do tmp_j = 1, dim_list_inact_orb @@ -2609,7 +2609,7 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Pre rotation for virtual MOs if (dim_list_virt_orb >= 2) then do tmp_j = 1, dim_list_virt_orb @@ -2626,21 +2626,21 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Nothing for deleted ones - + ! Compute pre rotation matrix from pre_rot call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) - + if (enforce_step_cancellation) then print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' print*, 'Reduce the angle for the pre rotation, abort' call abort endif - + ! New Mos (we don't car eabout the previous MOs prev_mos) call apply_mo_rotation(R,prev_mos) - + ! Update the things related to mo_coef TOUCH mo_coef call save_mos @@ -2683,19 +2683,19 @@ subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp ! min element in the hessian if (lambda < 0d0) then lambda = -lambda + 1d-6 - endif - + endif + print*, 'lambda', lambda - + ! Good do tmp_k = 1, tmp_n if (ABS(H(tmp_k,tmp_k)) > 1d-6) then tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) - !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) endif enddo - ! 1D tmp -> 2D tmp + ! 1D tmp -> 2D tmp tmp_m_x = 0d0 do tmp_j = 1, tmp_list_size - 1 do tmp_i = tmp_j + 1, tmp_list_size @@ -2707,7 +2707,7 @@ subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp ! Antisym do tmp_i = 1, tmp_list_size - 1 do tmp_j = tmp_i + 1, tmp_list_size - tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) enddo enddo @@ -2749,16 +2749,17 @@ end #+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f subroutine run_sort_by_fock_energies() - + implicit none - + BEGIN_DOC ! Saves the current MOs ordered by diagonal element of the Fock operator. END_DOC - + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size integer, allocatable :: iorder(:) - double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + integer, allocatable :: tmp_list(:) ! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) ! @@ -2818,9 +2819,9 @@ subroutine run_sort_by_fock_energies() iorder(i) = i !print*, tmp_i, fock_energies_tmp(i) enddo - + call dsort(fock_energies_tmp, iorder, tmp_list_size) - + print*,'MOs after sorting them by f_p^p energies:' do i = 1, tmp_list_size k = iorder(i) @@ -2844,7 +2845,7 @@ subroutine run_sort_by_fock_energies() print*,'HF energy:', HF_energy endif print*,'' - + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) endif @@ -2852,7 +2853,7 @@ subroutine run_sort_by_fock_energies() touch mo_coef call save_mos - + end #+END_SRC diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f index 5ca89790..39442a12 100644 --- a/src/mo_localization/localization_sub.irp.f +++ b/src/mo_localization/localization_sub.irp.f @@ -4,10 +4,10 @@ ! Gradient: -! qp_edit : +! qp_edit : ! | localization_method | method for the localization | -! Input: +! Input: ! | tmp_n | integer | Number of parameters in the MO subspace | ! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | ! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | @@ -20,7 +20,7 @@ subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - + include 'pi.h' implicit none @@ -28,7 +28,7 @@ subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_ele BEGIN_DOC ! Compute the gradient of the chosen localization method END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad @@ -90,7 +90,7 @@ end subroutine criterion_localization(tmp_list_size, tmp_list,criterion) include 'pi.h' - + implicit none BEGIN_DOC @@ -146,7 +146,7 @@ end subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) - + include 'pi.h' implicit none @@ -154,7 +154,7 @@ subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) BEGIN_DOC ! Compute the rotation angles between the MOs for the chosen localization method END_DOC - + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem @@ -170,7 +170,7 @@ subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) end ! Gradient -! Input: +! Input: ! | tmp_n | integer | Number of parameters in the MO subspace | ! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | ! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | @@ -188,13 +188,13 @@ end subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - + implicit none - + BEGIN_DOC ! Compute the gradient for the Foster-Boys localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:) @@ -220,11 +220,11 @@ subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) enddo enddo - + ! 2D -> 1D do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient @@ -233,8 +233,8 @@ subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr if (ABS(v_grad(tmp_k)) > max_elem) then max_elem = ABS(v_grad(tmp_k)) endif - enddo - + enddo + ! Norm of the gradient norm_grad = 0d0 do tmp_k = 1, tmp_n @@ -243,7 +243,7 @@ subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr norm_grad = dsqrt(norm_grad) print*, 'Maximal element in the gradient:', max_elem - print*, 'Norm of the gradient:', norm_grad + print*, 'Norm of the gradient:', norm_grad ! Deallocation deallocate(m_grad) @@ -261,7 +261,7 @@ end subroutine ! Gradient (OMP) subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) - + use omp_lib implicit none @@ -269,7 +269,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor BEGIN_DOC ! Compute the gradient for the Foster-Boys localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:) @@ -310,8 +310,8 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor !$OMP DO do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) - enddo + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo !$OMP END DO !$OMP END PARALLEL @@ -324,7 +324,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor if (ABS(v_grad(tmp_k)) > max_elem) then max_elem = ABS(v_grad(tmp_k)) endif - enddo + enddo ! Norm of the gradient norm_grad = 0d0 @@ -334,7 +334,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor norm_grad = dsqrt(norm_grad) print*, 'Maximal element in the gradient:', max_elem - print*, 'Norm of the gradient:', norm_grad + print*, 'Norm of the gradient:', norm_grad ! Deallocation deallocate(m_grad) @@ -349,7 +349,7 @@ subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, nor end subroutine -! Hessian +! Hessian ! Output: ! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | @@ -367,7 +367,7 @@ end subroutine subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) implicit none - + BEGIN_DOC ! Compute the diagonal hessian for the Foster-Boys localization END_DOC @@ -377,7 +377,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:) integer :: i,j,tmp_k,tmp_i, tmp_j double precision :: max_elem, t1,t2,t3 - + print*,'' print*,'---hessian_FB---' print*,'' @@ -387,7 +387,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! Allocation allocate(beta(tmp_list_size,tmp_list_size)) - + ! Calculation do tmp_j = 1, tmp_list_size j = tmp_list(tmp_j) @@ -405,7 +405,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) call vec_to_mat_index(tmp_k,tmp_i,tmp_j) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo - + ! Min elem max_elem = 0d0 do tmp_k = 1, tmp_n @@ -423,7 +423,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) endif enddo print*, 'Max elem H:', max_elem - + ! Near 0 max_elem = 1d10 do tmp_k = 1, tmp_n @@ -435,7 +435,7 @@ subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! Deallocation deallocate(beta) - + call wall_time(t2) t3 = t2 - t1 print*,'Time in hessian_FB:', t3 @@ -461,7 +461,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:) integer :: i,j,tmp_k,tmp_i,tmp_j double precision :: max_elem, t1,t2,t3 - + print*,'' print*,'---hessian_FB_omp---' print*,'' @@ -479,7 +479,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & !$OMP DEFAULT(NONE) - + ! Calculation !$OMP DO do tmp_j = 1, tmp_list_size @@ -497,11 +497,11 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) !$OMP DO do j = 1, tmp_n do i = 1, tmp_n - H(i,j) = 0d0 + H(i,j) = 0d0 enddo enddo !$OMP END DO - + ! Diagonalm of the hessian !$OMP DO do tmp_k = 1, tmp_n @@ -509,7 +509,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo !$OMP END DO - + !$OMP END PARALLEL call omp_set_max_active_levels(4) @@ -531,7 +531,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) endif enddo print*, 'Max elem H:', max_elem - + ! Near 0 max_elem = 1d10 do tmp_k = 1, tmp_n @@ -543,7 +543,7 @@ subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) ! Deallocation deallocate(beta) - + call wall_time(t2) t3 = t2 - t1 print*,'Time in hessian_FB_omp:', t3 @@ -567,12 +567,12 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:), tmp_int(:,:) - integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho ! Allocation allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) - ! Initialization + ! Initialization m_grad = 0d0 do a = 1, nucl_num ! loop over the nuclei @@ -580,12 +580,12 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra ! Loop over the MOs of the a given mo_class to compute do tmp_j = 1, tmp_list_size - j = tmp_list(tmp_j) + j = tmp_list(tmp_j) do tmp_i = 1, tmp_list_size i = tmp_list(tmp_i) do rho = 1, ao_num ! loop over all the AOs do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a - mu = nucl_aos(a,b) ! AO centered on atom a + mu = nucl_aos(a,b) ! AO centered on atom a tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) @@ -609,7 +609,7 @@ subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gra ! 2D -> 1D do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient @@ -654,7 +654,7 @@ end subroutine grad_pipek ! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A ! $c^t$ -> expansion coefficient of orbital |t> -! Input: +! Input: ! | tmp_n | integer | Number of parameters in the MO subspace | ! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | ! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | @@ -689,7 +689,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr BEGIN_DOC ! Compute gradient for the Pipek-Mezey localization END_DOC - + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) @@ -713,12 +713,12 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr do j = 1, ao_num tmp_mo_coef(j,tmp_i) = mo_coef(j,i) - + enddo enddo call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + m_grad = 0d0 do a = 1, nucl_num ! loop over the nuclei @@ -748,7 +748,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr enddo enddo - + do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) do tmp_i = 1, tmp_list_size @@ -756,14 +756,14 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr tmp_CS(tmp_i,b) = CS(tmp_i,mu) enddo - enddo + enddo call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) - + do tmp_j = 1, tmp_list_size do tmp_i = 1, tmp_list_size - tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) enddo enddo @@ -783,7 +783,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr ! 2D -> 1D do tmp_k = 1, tmp_n call vec_to_mat_index(tmp_k,tmp_i,tmp_j) - v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) enddo ! Maximum element in the gradient @@ -792,7 +792,7 @@ subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_gr if (ABS(v_grad(tmp_k)) > max_elem) then max_elem = ABS(v_grad(tmp_k)) endif - enddo + enddo ! Norm of the gradient norm_grad = 0d0 @@ -832,7 +832,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:),tmp_int(:,:) integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu double precision :: max_elem - + ! Allocation allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) @@ -853,7 +853,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) enddo - enddo + enddo enddo enddo @@ -865,7 +865,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) enddo enddo - + enddo H = 0d0 @@ -873,7 +873,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) call vec_to_mat_index(tmp_k,tmp_i,tmp_j) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo - + ! max_elem = 0d0 ! do tmp_k = 1, tmp_n ! if (H(tmp_k,tmp_k) < max_elem) then @@ -889,7 +889,7 @@ subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) ! endif ! enddo ! print*, 'Max elem H:', max_elem -! +! ! max_elem = 1d10 ! do tmp_k = 1, tmp_n ! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then @@ -925,7 +925,7 @@ end subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) implicit none - + BEGIN_DOC ! Compute diagonal hessian for the Pipek-Mezey localization END_DOC @@ -935,7 +935,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu double precision :: max_elem, t1,t2,t3 - + print*,'' print*,'---hessian_PM---' print*,'' @@ -953,12 +953,12 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) do j = 1, ao_num tmp_mo_coef(j,tmp_i) = mo_coef(j,i) - + enddo enddo call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + do a = 1, nucl_num ! loop over the nuclei tmp_int = 0d0 @@ -975,7 +975,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) ! enddo ! enddo !enddo - + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) do tmp_i = 1, tmp_list_size @@ -986,7 +986,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) enddo enddo - + do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) do tmp_i = 1, tmp_list_size @@ -994,14 +994,14 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) tmp_CS(tmp_i,b) = CS(tmp_i,mu) enddo - enddo + enddo call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) - + do tmp_j = 1, tmp_list_size do tmp_i = 1, tmp_list_size - tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) enddo enddo @@ -1016,7 +1016,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) enddo enddo - + enddo H = 0d0 @@ -1024,7 +1024,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) call vec_to_mat_index(tmp_k,tmp_i,tmp_j) H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) enddo - + max_elem = 0d0 do tmp_k = 1, tmp_n if (H(tmp_k,tmp_k) < max_elem) then @@ -1040,7 +1040,7 @@ subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) endif enddo print*, 'Max elem H:', max_elem - + max_elem = 1d10 do tmp_k = 1, tmp_n if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then @@ -1067,18 +1067,18 @@ end subroutine compute_crit_pipek(criterion) implicit none - + BEGIN_DOC ! Compute the Pipek-Mezey localization criterion END_DOC double precision, intent(out) :: criterion double precision, allocatable :: tmp_int(:,:) - integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho ! Allocation allocate(tmp_int(mo_num, mo_num)) - + criterion = 0d0 do a = 1, nucl_num ! loop over the nuclei @@ -1093,16 +1093,16 @@ subroutine compute_crit_pipek(criterion) + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) enddo - enddo + enddo enddo - do i = 1, mo_num + do i = 1, mo_num criterion = criterion + tmp_int(i,i)**2 enddo enddo - - criterion = - criterion + + criterion = - criterion deallocate(tmp_int) @@ -1114,7 +1114,7 @@ end ! \begin{align*} ! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 ! \end{align*} -! with +! with ! \begin{align*} ! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] ! \end{align*} @@ -1123,7 +1123,7 @@ end subroutine criterion_PM(tmp_list_size,tmp_list,criterion) implicit none - + BEGIN_DOC ! Compute the Pipek-Mezey localization criterion END_DOC @@ -1132,18 +1132,18 @@ subroutine criterion_PM(tmp_list_size,tmp_list,criterion) double precision, intent(out) :: criterion double precision, allocatable :: tmp_int(:,:),CS(:,:) integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho - + print*,'' print*,'---criterion_PM---' - + ! Allocation allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) - + ! Initialization criterion = 0d0 call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + do a = 1, nucl_num ! loop over the nuclei tmp_int = 0d0 @@ -1151,7 +1151,7 @@ subroutine criterion_PM(tmp_list_size,tmp_list,criterion) i = tmp_list(tmp_i) do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a mu = nucl_aos(a,b) - + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & @@ -1165,8 +1165,8 @@ subroutine criterion_PM(tmp_list_size,tmp_list,criterion) enddo enddo - - criterion = - criterion + + criterion = - criterion deallocate(tmp_int,CS) @@ -1180,11 +1180,11 @@ end subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) implicit none - + BEGIN_DOC ! Compute the Pipek-Mezey localization criterion END_DOC - + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) double precision, intent(out) :: criterion double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) @@ -1193,7 +1193,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) print*,'' print*,'---criterion_PM_v3---' - + call wall_time(t1) ! Allocation @@ -1208,16 +1208,16 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) do j = 1, ao_num tmp_mo_coef(j,tmp_i) = mo_coef(j,i) - + enddo enddo ! ao_overlap(ao_num,ao_num) ! mo_coef(ao_num,mo_num) call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) - + do a = 1, nucl_num ! loop over the nuclei - + do j = 1, tmp_list_size do i = 1, tmp_list_size tmp_int(i,j) = 0d0 @@ -1239,7 +1239,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) !enddo allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) - + do tmp_i = 1, tmp_list_size do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) @@ -1248,7 +1248,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) enddo enddo - + do b = 1, nucl_n_aos(a) mu = nucl_aos(a,b) do tmp_i = 1, tmp_list_size @@ -1256,15 +1256,15 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) tmp_CS(tmp_i,b) = CS(tmp_i,mu) enddo - enddo + enddo call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) - + ! Integrals do tmp_j = 1, tmp_list_size do tmp_i = 1, tmp_list_size - tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) enddo enddo @@ -1278,7 +1278,7 @@ subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) enddo - criterion = - criterion + criterion = - criterion deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) @@ -1477,14 +1477,14 @@ subroutine theta_FB(l, n, m_x, max_elem) !print*,idx_i,idx_j,max_elem max_elem = dabs(max_elem) - + deallocate(cos4theta, sin4theta) deallocate(A,B,beta,gamma) - + end subroutine theta_PM(l, n, m_x, max_elem) - + include 'pi.h' BEGIN_DOC @@ -1507,18 +1507,18 @@ subroutine theta_PM(l, n, m_x, max_elem) ! Loop over the MOs of the a given mo_class to compute do tmp_j = 1, n - j = l(tmp_j) + j = l(tmp_j) do tmp_i = 1, n i = l(tmp_i) do rho = 1, ao_num ! loop over all the AOs do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a - mu = nucl_aos(a,b) ! AO centered on atom a + mu = nucl_aos(a,b) ! AO centered on atom a Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) enddo - enddo + enddo enddo enddo @@ -1528,7 +1528,7 @@ subroutine theta_PM(l, n, m_x, max_elem) Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 enddo enddo - + ! B do j = 1, n do i = 1, n @@ -1581,7 +1581,7 @@ end ! Spatial extent -! The spatial extent of an orbital $i$ is computed as +! The spatial extent of an orbital $i$ is computed as ! \begin{align*} ! \sum_{\lambda=x,y,z}\sqrt{ - ^2} ! \end{align*} @@ -1596,14 +1596,14 @@ subroutine compute_spatial_extent(spatial_extent) BEGIN_DOC ! Compute the spatial extent of the MOs END_DOC - + double precision, intent(out) :: spatial_extent(mo_num) double precision :: average_core, average_act, average_inact, average_virt double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt integer :: i,j,k,l spatial_extent = 0d0 - + do i = 1, mo_num spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 enddo @@ -1631,7 +1631,7 @@ subroutine compute_spatial_extent(spatial_extent) call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) endif - + average_inact = 0d0 std_var_inact = 0d0 if (dim_list_inact_orb >= 2) then @@ -1661,12 +1661,12 @@ subroutine compute_spatial_extent(spatial_extent) print*, 'virt:', dim_list_virt_orb print*, 'mo_num:', mo_num print*,'' - + print*,'-- Core MOs --' print*,'Average:', average_core print*,'Std var:', std_var_core print*,'' - + print*,'-- Active MOs --' print*,'Average:', average_act print*,'Std var:', std_var_act @@ -1692,7 +1692,7 @@ end subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) implicit none - + BEGIN_DOC ! Compute the average spatial extent of the MOs END_DOC @@ -1701,7 +1701,7 @@ subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) double precision, intent(in) :: spatial_extent(mo_num) double precision, intent(out) :: average integer :: i, tmp_i - + average = 0d0 do tmp_i = 1, list_size i = list(tmp_i) @@ -1732,7 +1732,7 @@ subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_ i = list(tmp_i) std_var = std_var + (spatial_extent(i) - average)**2 enddo - + std_var = dsqrt(1d0/DBLE(list_size) * std_var) end @@ -1779,7 +1779,7 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Pre rotation for active MOs if (dim_list_act_orb >= 2) then do tmp_j = 1, dim_list_act_orb @@ -1796,7 +1796,7 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Pre rotation for inactive MOs if (dim_list_inact_orb >= 2) then do tmp_j = 1, dim_list_inact_orb @@ -1813,7 +1813,7 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Pre rotation for virtual MOs if (dim_list_virt_orb >= 2) then do tmp_j = 1, dim_list_virt_orb @@ -1830,21 +1830,21 @@ subroutine apply_pre_rotation() enddo enddo endif - + ! Nothing for deleted ones - + ! Compute pre rotation matrix from pre_rot call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) - + if (enforce_step_cancellation) then print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' print*, 'Reduce the angle for the pre rotation, abort' call abort endif - + ! New Mos (we don't car eabout the previous MOs prev_mos) call apply_mo_rotation(R,prev_mos) - + ! Update the things related to mo_coef TOUCH mo_coef call save_mos @@ -1885,19 +1885,19 @@ subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp ! min element in the hessian if (lambda < 0d0) then lambda = -lambda + 1d-6 - endif - + endif + print*, 'lambda', lambda - + ! Good do tmp_k = 1, tmp_n if (ABS(H(tmp_k,tmp_k)) > 1d-6) then tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) - !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) endif enddo - ! 1D tmp -> 2D tmp + ! 1D tmp -> 2D tmp tmp_m_x = 0d0 do tmp_j = 1, tmp_list_size - 1 do tmp_i = tmp_j + 1, tmp_list_size @@ -1909,7 +1909,7 @@ subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp ! Antisym do tmp_i = 1, tmp_list_size - 1 do tmp_j = tmp_i + 1, tmp_list_size - tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) enddo enddo @@ -1947,16 +1947,17 @@ subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) end subroutine run_sort_by_fock_energies() - + implicit none - + BEGIN_DOC ! Saves the current MOs ordered by diagonal element of the Fock operator. END_DOC - + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size integer, allocatable :: iorder(:) - double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + integer, allocatable :: tmp_list(:) ! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) ! @@ -2006,7 +2007,7 @@ subroutine run_sort_by_fock_energies() else tmp_list = list_virt endif - print*,'MO class: ',trim(mo_class(tmp_list(1))) + print*,'MO class: ', trim(mo_class(tmp_list(1))) allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) !print*,'MOs before sorting them by f_p^p energies:' @@ -2016,9 +2017,9 @@ subroutine run_sort_by_fock_energies() iorder(i) = i !print*, tmp_i, fock_energies_tmp(i) enddo - + call dsort(fock_energies_tmp, iorder, tmp_list_size) - + print*,'MOs after sorting them by f_p^p energies:' do i = 1, tmp_list_size k = iorder(i) @@ -2042,7 +2043,7 @@ subroutine run_sort_by_fock_energies() print*,'HF energy:', HF_energy endif print*,'' - + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) endif @@ -2050,5 +2051,5 @@ subroutine run_sort_by_fock_energies() touch mo_coef call save_mos - + end From b2f79e2581610fb55aace4d180b68006140c0d0b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 03:01:06 +0100 Subject: [PATCH 62/70] Vectorizing numerical integrals --- src/ao_many_one_e_ints/ao_gaus_gauss.irp.f | 141 +++- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 111 +-- src/ao_many_one_e_ints/listj1b.irp.f | 1 - .../prim_int_gauss_gauss.irp.f | 186 +++-- src/utils/integration.irp.f | 296 +++++++- src/utils/map_module.f90 | 6 +- src/utils/qsort.c | 373 ++++++++++ src/utils/qsort.org | 169 +++++ src/utils/qsort_module.f90 | 347 +++++++++ src/utils/sort.irp.f | 693 ------------------ 10 files changed, 1476 insertions(+), 847 deletions(-) create mode 100644 src/utils/qsort.c create mode 100644 src/utils/qsort.org create mode 100644 src/utils/qsort_module.f90 diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index d515b0c1..19cfe4cd 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -22,15 +22,15 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp) do m = 1, 3 gauss_ints(m) += gauss_ints_tmp(m) * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) + * ao_coef_normalized_ordered_transp(k,j) enddo - enddo + enddo enddo end @@ -61,13 +61,13 @@ double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,m power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx) overlap_gauss_xyz_r12_ao_specific = gauss_int * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) - enddo + * ao_coef_normalized_ordered_transp(k,j) + enddo enddo end @@ -90,13 +90,13 @@ subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints) power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta) aos_ints(j,i) += analytical_j * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) - enddo + * ao_coef_normalized_ordered_transp(k,j) + enddo enddo enddo enddo @@ -114,7 +114,7 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) implicit none integer, intent(in) :: i, j double precision, intent(in) :: D_center(3), delta - + integer :: power_A(3), power_B(3), l, k double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j @@ -133,19 +133,19 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) B_center(1:3) = nucl_coord(ao_nucl(j),1:3) do l = 1, ao_prim_num(i) - alpha = ao_expo_ordered_transp (l,i) - coef1 = ao_coef_normalized_ordered_transp(l,i) + alpha = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) do k = 1, ao_prim_num(j) beta = ao_expo_ordered_transp(k,j) - coef = coef1 * ao_coef_normalized_ordered_transp(k,j) + coef = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef) .lt. 1d-12) cycle analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta) overlap_gauss_r12_ao += coef * analytical_j - enddo + enddo enddo end function overlap_gauss_r12_ao @@ -163,14 +163,13 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, implicit none integer, intent(in) :: i, j double precision, intent(in) :: B_center(3), beta, D_center(3), delta - + integer :: power_A1(3), power_A2(3), l, k double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j double precision :: G_center(3), gama, fact_g, gama_inv double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao - ASSERT(beta .gt. 0.d0) if(beta .lt. 1d-10) then overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j) return @@ -204,22 +203,120 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) do l = 1, ao_prim_num(i) - alpha1 = ao_expo_ordered_transp (l,i) + alpha1 = ao_expo_ordered_transp (l,i) coef1 = fact_g * ao_coef_normalized_ordered_transp(l,i) if(dabs(coef1) .lt. 1d-12) cycle do k = 1, ao_prim_num(j) alpha2 = ao_expo_ordered_transp (k,j) - coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) + coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef12) .lt. 1d-12) cycle analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2) overlap_gauss_r12_ao_with1s += coef12 * analytical_j - enddo + enddo enddo end function overlap_gauss_r12_ao_with1s ! --- +subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points) + BEGIN_DOC + ! + ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} + ! using an array of D_centers. + ! + END_DOC + + implicit none + integer, intent(in) :: i, j, n_points + double precision, intent(in) :: B_center(3), beta, D_center(3,n_points), delta + double precision, intent(out) :: resv(n_points) + + integer :: power_A1(3), power_A2(3), l, k + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 + double precision :: coef12, coef12f + double precision :: gama, gama_inv + double precision :: bg, dg, bdg + + double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao + + double precision, external :: overlap_gauss_r12_ao_with1s + integer :: ipoint + + double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:) + + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + return + endif + + ASSERT(beta .gt. 0.d0) + + if(beta .lt. 1d-10) then + do ipoint=1,n_points + resv(ipoint) = overlap_gauss_r12_ao(D_center(1,ipoint), delta, i, j) + enddo + return + endif + + resv(:) = 0.d0 + + ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2} + + gama = beta + delta + gama_inv = 1.d0 / gama + + power_A1(1:3) = ao_power(i,1:3) + power_A2(1:3) = ao_power(j,1:3) + + A1_center(1:3) = nucl_coord(ao_nucl(i),1:3) + A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) + + allocate (fact_g(n_points), G_center(3,n_points), analytical_j(n_points) ) + + bg = beta * gama_inv + dg = delta * gama_inv + bdg = bg * delta + do ipoint=1,n_points + G_center(1,ipoint) = bg * B_center(1) + dg * D_center(1,ipoint) + G_center(2,ipoint) = bg * B_center(2) + dg * D_center(2,ipoint) + G_center(3,ipoint) = bg * B_center(3) + dg * D_center(3,ipoint) + fact_g(ipoint) = bdg * ( & + (B_center(1) - D_center(1,ipoint)) * (B_center(1) - D_center(1,ipoint)) & + + (B_center(2) - D_center(2,ipoint)) * (B_center(2) - D_center(2,ipoint)) & + + (B_center(3) - D_center(3,ipoint)) * (B_center(3) - D_center(3,ipoint)) ) + + if(fact_g(ipoint) < 10d0) then + fact_g(ipoint) = dexp(-fact_g(ipoint)) + else + fact_g(ipoint) = 0.d0 + endif + + enddo + + ! --- + + do l = 1, ao_prim_num(i) + alpha1 = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) + + do k = 1, ao_prim_num(j) + alpha2 = ao_expo_ordered_transp (k,j) + coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) + if(dabs(coef12) .lt. 1d-12) cycle + + call overlap_gauss_r12_v(G_center, gama, A1_center,& + A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points) + + do ipoint=1,n_points + coef12f = coef12 * fact_g(ipoint) + resv(ipoint) += coef12f * analytical_j(ipoint) + end do + + enddo + enddo + + +end diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 50b4bf96..6875bc7a 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -16,54 +16,55 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n double precision :: tmp double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - int2_grad1u2_grad2u2_j1b2 = 0.d0 + allocate(int_fit_v(n_points_final_grid)) + int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2) - !$OMP DO - !do ipoint = 1, 10 - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points, n_max_fit_slat, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& + !$OMP ao_overlap_abs) + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num - do i = 1, ao_num - do j = i, ao_num + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + cycle + endif - tmp = 0.d0 - do i_1s = 1, List_all_comb_b3_size + do i_1s = 1, List_all_comb_b3_size - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - do i_fit = 1, n_max_fit_slat + do i_fit = 1, n_max_fit_slat - expo_fit = expo_gauss_1_erf_x_2(i_fit) - coef_fit = coef_gauss_1_erf_x_2(i_fit) - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - tmp += -0.25d0 * coef * coef_fit * int_fit - enddo - enddo + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points, expo_fit, i, j, int_fit_v, n_points_final_grid) - int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp - enddo - enddo - enddo + do ipoint = 1, n_points_final_grid + int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + enddo + enddo + + enddo + enddo + enddo !$OMP END DO !$OMP END PARALLEL @@ -78,7 +79,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n call wall_time(wall1) print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -105,11 +106,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u2_j1b2) !$OMP DO !do ipoint = 1, 10 @@ -158,7 +159,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final call wall_time(wall1) print*, ' wall time for int2_u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -186,12 +187,12 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -214,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p B_center(3) = List_all_comb_b3_cent(3,i_1s) dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) + + (B_center(3) - r(3)) * (B_center(3) - r(3)) do i_fit = 1, n_max_fit_slat @@ -222,17 +223,17 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist + expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist !if(expo_coef_1s .gt. 80.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) !if(dabs(coef_tmp) .lt. 1d-10) cycle - + call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) tmp_x += coef_tmp * int_fit(1) @@ -263,7 +264,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p call wall_time(wall1) print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -291,11 +292,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -323,7 +324,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) @@ -332,7 +333,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !if(expo_coef_1s .gt. 80.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) !if(dabs(coef_tmp) .lt. 1d-10) cycle - + int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) tmp += coef_tmp * int_fit @@ -357,7 +358,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points call wall_time(wall1) print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 42d37069..ced6f4e9 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -63,7 +63,6 @@ END_PROVIDER tmp_cent_z += tmp_alphaj * nucl_coord(j,3) enddo - ASSERT(List_all_comb_b2_expo(i) .gt. 0d0) if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 749227ea..0da39561 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -1,67 +1,139 @@ - double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta) BEGIN_DOC ! Computes the following integral : ! - ! .. math:: - ! + ! .. math :: + ! ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! END_DOC - implicit none + implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" - double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" - integer, intent(in) :: power_A(3),power_B(3) + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" + integer, intent(in) :: power_A(3),power_B(3) - double precision :: overlap_x,overlap_y,overlap_z,overlap - ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom - double precision :: A_center_new(3) ! new center - integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A - double precision :: alpha_new ! new exponent - double precision :: fact_a_new ! constant factor - double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr - integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1 - dim1=100 - thr = 1.d-10 - d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 + double precision :: overlap_x,overlap_y,overlap_z,overlap + ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) + double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_center_new(3) ! new center + integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A + double precision :: alpha_new ! new exponent + double precision :: fact_a_new ! constant factor + double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1 + dim1=100 + thr = 1.d-10 + d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & - delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) - ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 - accu = 0.d0 - do lx = 0, iorder_a_new(1) - coefx = A_new(lx,1) - if(dabs(coefx).lt.thr)cycle - iorder_tmp(1) = lx - do ly = 0, iorder_a_new(2) - coefy = A_new(ly,2) - coefxy = coefx * coefy - if(dabs(coefxy).lt.thr)cycle - iorder_tmp(2) = ly - do lz = 0, iorder_a_new(3) - coefz = A_new(lz,3) - coefxyz = coefxy * coefz - if(dabs(coefxyz).lt.thr)cycle - iorder_tmp(3) = lz - call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - accu += coefxyz * overlap - enddo + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,& + delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) + ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 + accu = 0.d0 + do lx = 0, iorder_a_new(1) + coefx = A_new(lx,1) + if(dabs(coefx).lt.thr)cycle + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + coefy = A_new(ly,2) + coefxy = coefx * coefy + if(dabs(coefxy).lt.thr)cycle + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + coefz = A_new(lz,3) + coefxyz = coefxy * coefz + if(dabs(coefxyz).lt.thr)cycle + iorder_tmp(3) = lz + call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + accu += coefxyz * overlap + enddo + enddo enddo - enddo - overlap_gauss_r12 = fact_a_new * accu + overlap_gauss_r12 = fact_a_new * accu end +!--- + +subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) + BEGIN_DOC + ! Computes the following integral : + ! + ! .. math :: + ! + ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! using an array of D_centers + ! + END_DOC + + implicit none + include 'constants.include.F' + integer, intent(in) :: n_points + double precision, intent(in) :: D_center(3,n_points), delta ! pure gaussian "D" + double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" + integer, intent(in) :: power_A(3),power_B(3) + double precision, intent(out) :: rvec(n_points) + + double precision :: overlap_x,overlap_y,overlap_z,overlap + + integer :: maxab + integer, allocatable :: iorder_a_new(:) + double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) + double precision, allocatable :: fact_a_new(:) + double precision :: alpha_new + double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint + + dim1=100 + thr = 1.d-10 + d(:) = 0 + +! maxab = maxval(d(1:3)) + maxab = max_dim + allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & + fact_a_new(n_points), iorder_a_new(3)) + + call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & + alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & + D_center, A_center, n_points) + + do ipoint=1,n_points + + ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 + accu = 0.d0 + do lx = 0, iorder_a_new(1) + coefx = A_new(lx,1,ipoint) + if(dabs(coefx).lt.thr)cycle + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + coefy = A_new(ly,2,ipoint) + coefxy = coefx * coefy + if(dabs(coefxy).lt.thr)cycle + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + coefz = A_new(lz,3,ipoint) + coefxyz = coefxy * coefz + if(dabs(coefxyz).lt.thr)cycle + iorder_tmp(3) = lz + call overlap_gaussian_xyz(A_center_new(1,ipoint),B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + accu += coefxyz * overlap + enddo + enddo + enddo + rvec(ipoint) = fact_a_new(ipoint) * accu + end do +end + +!--- +!--- subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints) BEGIN_DOC ! Computes the following integral : ! ! .. math:: - ! + ! ! gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z @@ -69,14 +141,14 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: gauss_ints(3) double precision :: overlap_x,overlap_y,overlap_z,overlap ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_new(0:max_dim,3)! new polynom double precision :: A_center_new(3) ! new center integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A integer :: power_B_new(3) @@ -88,8 +160,8 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ thr = 1.d-10 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 gauss_ints = 0.d0 @@ -99,12 +171,12 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ iorder_tmp(1) = lx do ly = 0, iorder_a_new(2) coefy = A_new(ly,2) - coefxy = coefx * coefy + coefxy = coefx * coefy if(dabs(coefxy).lt.thr)cycle iorder_tmp(2) = ly do lz = 0, iorder_a_new(3) coefz = A_new(lz,3) - coefxyz = coefxy * coefz + coefxyz = coefxy * coefz if(dabs(coefxyz).lt.thr)cycle iorder_tmp(3) = lz do m = 1, 3 @@ -129,7 +201,7 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z @@ -137,13 +209,13 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3),mx double precision :: overlap_x,overlap_y,overlap_z,overlap ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_new(0:max_dim,3)! new polynom double precision :: A_center_new(3) ! new center integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A integer :: power_B_new(3) @@ -155,8 +227,8 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center thr = 1.d-10 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 overlap_gauss_xyz_r12_specific = 0.d0 @@ -166,12 +238,12 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center iorder_tmp(1) = lx do ly = 0, iorder_a_new(2) coefy = A_new(ly,2) - coefxy = coefx * coefy + coefxy = coefx * coefy if(dabs(coefxy).lt.thr)cycle iorder_tmp(2) = ly do lz = 0, iorder_a_new(3) coefz = A_new(lz,3) - coefxyz = coefxy * coefz + coefxyz = coefxy * coefz if(dabs(coefxyz).lt.thr)cycle iorder_tmp(3) = lz m = mx diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index c2bff2e8..fd9e233b 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,13 +86,11 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 p = 1.d+15 - P_new = 0.d0 - iorder = 0 fact_k = 0.d0 return endif @@ -129,6 +127,89 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, end +!--- + +subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points) + BEGIN_DOC + ! Transforms the product of + ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) + ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) + ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) + ! + ! WARNING :: : IF fact_k is too smal then: + ! returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + END_DOC + implicit none + include 'constants.include.F' + integer, intent(in) :: n_points, ldp + integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) + double precision, intent(in) :: alpha, beta ! exponents + double precision, intent(in) :: A_center(3,n_points) ! A center + double precision, intent(in) :: B_center (3) ! B center + double precision, intent(out) :: P_center(3,n_points) ! new center + double precision, intent(out) :: p ! new exponent + double precision, intent(out) :: fact_k(n_points) ! constant factor + double precision, intent(out) :: P_new(0:ldp,3,n_points)! polynomial + integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials + + double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) + + integer :: n_new,i,j, ipoint, lda, ldb, xyz + + call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) + + if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) + + lda = maxval(a) + ldb = 0 + allocate(P_a(0:lda,3,n_points),P_b(0:0,3,n_points)) + + call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) + + iorder(1:3) = a(1:3) + do ipoint=1,n_points + do xyz=1,3 + P_new(0,xyz,ipoint) = P_b(0,xyz,ipoint) * P_a(0,xyz,ipoint) + do i=1,a(xyz) + P_new(i,xyz,ipoint) = P_new(i,xyz,ipoint) + P_b(0,xyz,ipoint) * P_a(i,xyz,ipoint) + enddo + enddo + enddo + return + + endif + + lda = maxval(a) + ldb = maxval(b) + allocate(P_a(0:lda,3,n_points),P_b(0:ldb,3,n_points)) + call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) + + iorder(1:3) = a(1:3) + b(1:3) + do xyz=1,3 + if (b(xyz) == 0) then + do ipoint=1,n_points + P_new(0,xyz,ipoint) = P_b(0,xyz,ipoint) * P_a(0,xyz,ipoint) + do i=1,a(xyz) + P_new(i,xyz,ipoint) = P_new(i,xyz,ipoint) + P_b(0,xyz,ipoint) * P_a(i,xyz,ipoint) + enddo + enddo + else + do ipoint=1,n_points + do i=0,iorder(xyz) + P_new(i,xyz,ipoint) = 0.d0 + enddo + n_new=0 + call multiply_poly(P_a(0,xyz,ipoint),a(xyz),P_b(0,xyz,ipoint),b(xyz),P_new(0,xyz,ipoint),n_new) + enddo + endif + enddo + +end + +!- subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) BEGIN_DOC @@ -231,6 +312,59 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp) xp(3) = (a*xa(3)+b*xb(3))*p_inv end subroutine +!--- +subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D. + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + ! Using multiple A centers + END_DOC + + integer, intent(in) :: n_points + double precision, intent(in) :: a,b ! Exponents + double precision, intent(in) :: xa(3,n_points),xb(3) ! Centers + double precision, intent(out) :: p ! New exponent + double precision, intent(out) :: xp(3,n_points) ! New center + double precision, intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + + integer :: ipoint + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab(3), ab, ap, bp, bpxb(3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + ap = a*p_inv + bp = b*p_inv + bpxb(1) = bp*xb(1) + bpxb(2) = bp*xb(2) + bpxb(3) = bp*xb(3) + + do ipoint=1,n_points + xab(1) = xa(1,ipoint)-xb(1) + xab(2) = xa(2,ipoint)-xb(2) + xab(3) = xa(3,ipoint)-xb(3) + k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3)) + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + xp(1,ipoint) = 0.d0 + xp(2,ipoint) = 0.d0 + xp(3,ipoint) = 0.d0 + else + k(ipoint) = dexp(-k(ipoint)) + xp(1,ipoint) = ap*xa(1,ipoint)+bpxb(1) + xp(2,ipoint) = ap*xa(2,ipoint)+bpxb(2) + xp(3,ipoint) = ap*xa(3,ipoint)+bpxb(3) + endif + enddo +end subroutine + @@ -404,22 +538,152 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b) do i = minab+1,min(b,20) P_new2(i) = binom_transp(b-i,b) * pows_b(b-i) enddo - do i = 101,a + do i = 21,a P_new(i) = binom_func(a,a-i) * pows_a(a-i) enddo - do i = 101,b + do i = 21,b P_new2(i) = binom_func(b,b-i) * pows_b(b-i) enddo end -subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) +!- +subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) + implicit none + BEGIN_DOC + ! Recenter two polynomials + END_DOC + integer, intent(in) :: a(3),b(3), n_points, lda, ldb + double precision, intent(in) :: x_A(3,n_points),x_P(3,n_points),x_B(3),x_Q(3,n_points) + double precision, intent(out) :: P_new(0:lda,3,n_points),P_new2(0:ldb,3,n_points) + double precision :: binom_func + integer :: i,j,k,l, minab(3), maxab(3),ipoint, xyz + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + double precision :: fa, fb + + maxab(1:3) = max(a(1:3),b(1:3)) + minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/)) + + allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) + + + do xyz=1,3 + if ((a(xyz)<0).or.(b(xyz)<0) ) cycle + do ipoint=1,n_points + pows_a(ipoint,0) = 1.d0 + pows_a(ipoint,1) = (x_P(xyz,ipoint) - x_A(xyz,ipoint)) + pows_b(ipoint,0) = 1.d0 + pows_b(ipoint,1) = (x_Q(xyz,ipoint) - x_B(xyz)) + enddo + do i = 2,maxab(xyz) + do ipoint=1,n_points + pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1) + pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1) + enddo + enddo + do ipoint=1,n_points + P_new (0,xyz,ipoint) = pows_a(ipoint,a(xyz)) + P_new2(0,xyz,ipoint) = pows_b(ipoint,b(xyz)) + enddo + do i = 1,min(minab(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + fb = binom_transp(b(xyz)-i,b(xyz)) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + do i = minab(xyz)+1,min(a(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = minab(xyz)+1,min(b(xyz),20) + fb = binom_transp(b(xyz)-i,b(xyz)) + do ipoint=1,n_points + P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + do i = 21,a(xyz) + fa = binom_func(a(xyz),a(xyz)-i) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = 21,b(xyz) + fb = binom_func(b(xyz),b(xyz)-i) + do ipoint=1,n_points + P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + enddo +end + + +subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) + implicit none + BEGIN_DOC + ! Recenter two polynomials. Special case for b=(0,0,0) + END_DOC + integer, intent(in) :: a(3), n_points, lda + double precision, intent(in) :: x_A(3,n_points),x_P(3,n_points),x_B(3),x_Q(3,n_points) + double precision, intent(out) :: P_new(0:lda,3,n_points),P_new2(3,n_points) + double precision :: binom_func + integer :: i,j,k,l, xyz, ipoint, maxab(3) + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + double precision :: fa + + maxab(1:3) = max(a(1:3),(/0,0,0/)) + + allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) + + do xyz=1,3 + if (a(xyz)<0) cycle + do ipoint=1,n_points + pows_a(ipoint,0) = 1.d0 + pows_a(ipoint,1) = (x_P(xyz,ipoint) - x_A(xyz,ipoint)) + pows_b(ipoint,0) = 1.d0 + pows_b(ipoint,1) = (x_Q(xyz,ipoint) - x_B(xyz)) + enddo + do i = 2,maxab(xyz) + do ipoint=1,n_points + pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1) + pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1) + enddo + enddo + do ipoint=1,n_points + P_new (0,xyz,ipoint) = pows_a(ipoint,a(xyz)) + P_new2(xyz,ipoint) = pows_b(ipoint,0) + enddo + do i = 1,min(a(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = 21,a(xyz) + fa = binom_func(a(xyz),a(xyz)-i) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + + enddo !xyz + + deallocate(pows_a, pows_b) +end + +!-- +!-- + +subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) BEGIN_DOC - ! + ! ! Transform the pol centerd on A: - ! [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] + ! [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] ! to a pol centered on B - ! [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] + ! [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] ! END_DOC @@ -437,7 +701,7 @@ subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) do i = 1, 3 Lmax = iorder(i) - call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) + call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) enddo return @@ -445,14 +709,14 @@ end subroutine pol_modif_center -subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) +subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) BEGIN_DOC - ! + ! ! Transform the pol centerd on A: - ! [ \sum_i ax_i (x-x_A)^i ] + ! [ \sum_i ax_i (x-x_A)^i ] ! to a pol centered on B - ! [ \sum_i bx_i (x-x_B)^i ] + ! [ \sum_i bx_i (x-x_B)^i ] ! ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ] ! = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i) @@ -591,7 +855,7 @@ double precision function rint_sum(n_pt_out,rho,d1) u_inv=1.d0/dsqrt(rho) u=rho*u_inv rint_sum=0.5d0*u_inv*sqpi*derf(u) *d1(0) -! print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) +! print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) endif do i=2,n_pt_out,2 diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90 index 98e73470..ceaec874 100644 --- a/src/utils/map_module.f90 +++ b/src/utils/map_module.f90 @@ -238,11 +238,11 @@ subroutine cache_map_sort(map) iorder(i) = i enddo if (cache_key_kind == 2) then - call i2radix_sort(map%key,iorder,map%n_elements,-1) + call i2sort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 4) then - call iradix_sort(map%key,iorder,map%n_elements,-1) + call isort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 8) then - call i8radix_sort(map%key,iorder,map%n_elements,-1) + call i8sort(map%key,iorder,map%n_elements,-1) endif if (integral_kind == 4) then call set_order(map%value,iorder,map%n_elements) diff --git a/src/utils/qsort.c b/src/utils/qsort.c new file mode 100644 index 00000000..c011b35a --- /dev/null +++ b/src/utils/qsort.c @@ -0,0 +1,373 @@ +/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ +#include +#include + +struct int16_t_comp { + int16_t x; + int32_t i; +}; + +int compare_int16_t( const void * l, const void * r ) +{ + const int16_t * restrict _l= l; + const int16_t * restrict _r= r; + if( *_l > *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct double_comp* A = malloc(isize * sizeof(struct double_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct float_comp* A = malloc(isize * sizeof(struct float_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i> +""" +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("TYPE", typ).replace("_big", "") ) + print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f2 +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +* Generated C file + +#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes +#include +#include +<> +#+END_SRC + +* Generated Fortran file + +#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes +module qsort_module + use iso_c_binding + + interface + <> + end interface + +end module qsort_module + +<> + +#+END_SRC + diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 new file mode 100644 index 00000000..a72a4f9e --- /dev/null +++ b/src/utils/qsort_module.f90 @@ -0,0 +1,347 @@ +module qsort_module + use iso_c_binding + + interface + + subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_c + + subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_c + + + + subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_big_c + + subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_big_c + + + + subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_c + + subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_c + + + + subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_big_c + + subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_big_c + + + + subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_c + + subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_c + + + + subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_big_c + + subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_big_c + + + + subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_c + + subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_c + + + + subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_big_c + + subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_big_c + + + + subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_c + + subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_c + + + + subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_big_c + + subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_big_c + + + + end interface + +end module qsort_module + + +subroutine i2sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_c(A, iorder, isize) +end subroutine i2sort + +subroutine i2sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_c(A, isize) +end subroutine i2sort_noidx + + + +subroutine i2sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_big_c(A, iorder, isize) +end subroutine i2sort_big + +subroutine i2sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_big_c(A, isize) +end subroutine i2sort_noidx_big + + + +subroutine isort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_c(A, iorder, isize) +end subroutine isort + +subroutine isort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_c(A, isize) +end subroutine isort_noidx + + + +subroutine isort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_big_c(A, iorder, isize) +end subroutine isort_big + +subroutine isort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_big_c(A, isize) +end subroutine isort_noidx_big + + + +subroutine i8sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_c(A, iorder, isize) +end subroutine i8sort + +subroutine i8sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_c(A, isize) +end subroutine i8sort_noidx + + + +subroutine i8sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_big_c(A, iorder, isize) +end subroutine i8sort_big + +subroutine i8sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_big_c(A, isize) +end subroutine i8sort_noidx_big + + + +subroutine dsort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_c(A, iorder, isize) +end subroutine dsort + +subroutine dsort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_c(A, isize) +end subroutine dsort_noidx + + + +subroutine dsort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_big_c(A, iorder, isize) +end subroutine dsort_big + +subroutine dsort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_big_c(A, isize) +end subroutine dsort_noidx_big + + + +subroutine sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_c(A, iorder, isize) +end subroutine sort + +subroutine sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_c(A, isize) +end subroutine sort_noidx + + + +subroutine sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_big_c(A, iorder, isize) +end subroutine sort_big + +subroutine sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_big_c(A, isize) +end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index a63eb4a3..089c3871 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,222 +1,4 @@ BEGIN_TEMPLATE - subroutine insertion_$Xsort (x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the insertion sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - $type :: xtmp - integer :: i, i0, j, jmax - - do i=2,isize - xtmp = x(i) - i0 = iorder(i) - j=i-1 - do while (j>0) - if ((x(j) <= xtmp)) exit - x(j+1) = x(j) - iorder(j+1) = iorder(j) - j=j-1 - enddo - x(j+1) = xtmp - iorder(j+1) = i0 - enddo - end subroutine insertion_$Xsort - - subroutine quick_$Xsort(x, iorder, isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the quicksort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer, external :: omp_get_num_threads - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - end - - recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) - implicit none - integer, intent(in) :: isize, first, last, level - integer,intent(inout) :: iorder(isize) - $type, intent(inout) :: x(isize) - $type :: c, tmp - integer :: itmp - integer :: i, j - - if(isize<2)return - - c = x( shiftr(first+last,1) ) - i = first - j = last - do - do while (x(i) < c) - i=i+1 - end do - do while (c < x(j)) - j=j-1 - end do - if (i >= j) exit - tmp = x(i) - x(i) = x(j) - x(j) = tmp - itmp = iorder(i) - iorder(i) = iorder(j) - iorder(j) = itmp - i=i+1 - j=j-1 - enddo - if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - else - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - endif - end - - subroutine heap_$Xsort(x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the heap sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - - integer :: i, k, j, l, i0 - $type :: xtemp - - l = isize/2+1 - k = isize - do while (.True.) - if (l>1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j0_8) - if (x(j)<=xtmp) exit - x(j+1_8) = x(j) - iorder(j+1_8) = iorder(j) - j = j-1_8 - enddo - x(j+1_8) = xtmp - iorder(j+1_8) = i0 - enddo - - end subroutine insertion_$Xsort_big - subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -563,223 +90,3 @@ SUBST [ X, type ] END_TEMPLATE -BEGIN_TEMPLATE - -recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) - implicit none - - BEGIN_DOC - ! Sort integer array x(isize) using the radix sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - ! iradix should be -1 in input. - END_DOC - integer*$int_type, intent(in) :: isize - integer*$int_type, intent(inout) :: iorder(isize) - integer*$type, intent(inout) :: x(isize) - integer, intent(in) :: iradix - integer :: iradix_new - integer*$type, allocatable :: x2(:), x1(:) - integer*$type :: i4 ! data type - integer*$int_type, allocatable :: iorder1(:),iorder2(:) - integer*$int_type :: i0, i1, i2, i3, i ! index type - integer*$type :: mask - integer :: err - !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 - - if (isize < 2) then - return - endif - - if (iradix == -1) then ! Sort Positive and negative - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - do i=1_$int_type,isize - if (x(i) < 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = -x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i2 - iorder(i1+i) = iorder2(i) - x(i1+i) = x2(i) - enddo - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i1 > 1_$int_type) then - call $Xradix_sort$big(x1,iorder1,i1,-2) - do i=1_$int_type,i1 - x(i) = -x1(1_$int_type+i1-i) - iorder(i) = iorder1(1_$int_type+i1-i) - enddo - endif - - if (i2>1_$int_type) then - call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) - endif - - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - return - - else if (iradix == -2) then ! Positive - - ! Find most significant bit - - i0 = 0_$int_type - i4 = maxval(x) - - iradix_new = max($integer_size-1-leadz(i4),1) - mask = ibset(0_$type,iradix_new) - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder1(i) - x(i0+i) = x1(i) - enddo - i0 = i0+i1 - i3 = i0 - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - - - do i=1_$int_type,i2 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - i0 = i0+i2 - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i3>1_$int_type) then - call $Xradix_sort$big(x,iorder,i3,iradix_new-1) - endif - - if (isize-i3>1_$int_type) then - call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) - endif - - return - endif - - ASSERT (iradix >= 0) - - if (isize < 48) then - call insertion_$Xsort$big(x,iorder,isize) - return - endif - - - allocate(x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x1, iorder1' - stop - endif - - - mask = ibset(0_$type,iradix) - i0=1_$int_type - i1=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder(i0) = iorder(i) - x(i0) = x(i) - i0 = i0+1_$int_type - else - iorder2(i1) = iorder(i) - x2(i1) = x(i) - i1 = i1+1_$int_type - endif - enddo - i0=i0-1_$int_type - i1=i1-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x2, iorder2' - stop - endif - - - if (iradix == 0) then - return - endif - - - if (i1>1_$int_type) then - call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) - endif - if (i0>1) then - call $Xradix_sort$big(x,iorder,i0,iradix-1) - endif - - end - -SUBST [ X, type, integer_size, is_big, big, int_type ] - i ; 4 ; 32 ; .False. ; ; 4 ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; -END_TEMPLATE - - - From 23979d91129c6489d46720317e4a77b395e988bf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 14:47:42 +0100 Subject: [PATCH 63/70] Added final_grid_points_transp --- src/becke_numerical_grid/grid_becke_vector.irp.f | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index a72200f7..64b522d8 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -58,3 +58,18 @@ END_PROVIDER enddo END_PROVIDER + + +BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] + implicit none + BEGIN_DOC + ! final_grid_points_transp(j,1:3) = (/ x, y, z /) of the jth grid point + END_DOC + integer :: i + do i=1,n_points_final_grid + final_grid_points_transp(i,1) = final_grid_points(1,i) + final_grid_points_transp(i,2) = final_grid_points(2,i) + final_grid_points_transp(i,3) = final_grid_points(3,i) + enddo +END_PROVIDER + From c1bdfe7e93489d03adf2d69b2d8d89ee5e72b49e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 14:52:27 +0100 Subject: [PATCH 64/70] Starting to transpose --- src/ao_many_one_e_ints/ao_gaus_gauss.irp.f | 60 +++++++++++++++++-- src/utils/one_e_integration.irp.f | 67 +++++++++++++++++++++- 2 files changed, 119 insertions(+), 8 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index 19cfe4cd..a657d6b1 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -150,6 +150,58 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) end function overlap_gauss_r12_ao +! -- + +subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) + + BEGIN_DOC + ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2} + END_DOC + + implicit none + integer, intent(in) :: i, j, n_points + double precision, intent(in) :: D_center(3,n_points), delta + double precision, intent(out) :: resv(n_points) + + integer :: power_A(3), power_B(3), l, k + double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1 + double precision, allocatable :: analytical_j(:) + + double precision, external :: overlap_gauss_r12 + integer :: ipoint + + resv(:) = 0.d0 + if(ao_overlap_abs(j,i).lt.1.d-12) then + return + endif + + power_A(1:3) = ao_power(i,1:3) + power_B(1:3) = ao_power(j,1:3) + + A_center(1:3) = nucl_coord(ao_nucl(i),1:3) + B_center(1:3) = nucl_coord(ao_nucl(j),1:3) + + allocate(analytical_j(n_points)) + do l = 1, ao_prim_num(i) + alpha = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) + + do k = 1, ao_prim_num(j) + beta = ao_expo_ordered_transp(k,j) + coef = coef1 * ao_coef_normalized_ordered_transp(k,j) + + if(dabs(coef) .lt. 1d-12) cycle + + call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points) + do ipoint=1, n_points + resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint) + enddo + enddo + enddo + deallocate(analytical_j) + +end + ! --- double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, delta, i, j) @@ -241,9 +293,6 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, double precision :: gama, gama_inv double precision :: bg, dg, bdg - double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao - - double precision, external :: overlap_gauss_r12_ao_with1s integer :: ipoint double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:) @@ -255,9 +304,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, ASSERT(beta .gt. 0.d0) if(beta .lt. 1d-10) then - do ipoint=1,n_points - resv(ipoint) = overlap_gauss_r12_ao(D_center(1,ipoint), delta, i, j) - enddo + call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) return endif @@ -317,6 +364,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, enddo enddo + deallocate (fact_g, G_center, analytical_j ) end diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index 9c1d2445..e175042b 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -92,9 +92,9 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& overlap = overlap_x * overlap_y * overlap_z end - + ! --- - + subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx) BEGIN_DOC @@ -151,4 +151,67 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over end +! --- +subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& + power_B,overlap_x,overlap_y,overlap_z,overlap,dim, n_points) + implicit none + BEGIN_DOC + !.. math:: + ! + ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\ + ! S = S_x S_y S_z + ! + END_DOC + include 'constants.include.F' + integer,intent(in) :: dim, n_points + double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions + double precision, intent(in) :: alpha,beta + integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions + double precision, intent(out) :: overlap_x(n_points),overlap_y(n_points),overlap_z(n_points),overlap(n_points) + double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p + double precision :: F_integral_tab(0:max_dim) + integer :: iorder_p(3) + + call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) + if(fact_p.lt.1d-20)then + overlap_x = 1.d-10 + overlap_y = 1.d-10 + overlap_z = 1.d-10 + overlap = 1.d-10 + return + endif + integer :: nmax + double precision :: F_integral + nmax = maxval(iorder_p) + do i = 0,nmax + F_integral_tab(i) = F_integral(i,p) + enddo + overlap_x = P_new(0,1) * F_integral_tab(0) + overlap_y = P_new(0,2) * F_integral_tab(0) + overlap_z = P_new(0,3) * F_integral_tab(0) + + integer :: i + do i = 1,iorder_p(1) + overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i) + enddo + call gaussian_product_x(alpha,A_center(1),beta,B_center(1),fact_p,p,P_center(1)) + overlap_x *= fact_p + + do i = 1,iorder_p(2) + overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i) + enddo + call gaussian_product_x(alpha,A_center(2),beta,B_center(2),fact_p,p,P_center(2)) + overlap_y *= fact_p + + do i = 1,iorder_p(3) + overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i) + enddo + call gaussian_product_x(alpha,A_center(3),beta,B_center(3),fact_p,p,P_center(3)) + overlap_z *= fact_p + + overlap = overlap_x * overlap_y * overlap_z + +end + +! --- From 76eeade4d5d5908ebbb971a2e3bdbd0d065848ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 17:54:04 +0100 Subject: [PATCH 65/70] Vectorizing integrals --- src/ao_many_one_e_ints/ao_gaus_gauss.irp.f | 18 ++--- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 6 +- .../prim_int_gauss_gauss.irp.f | 61 ++++++++------- src/utils/integration.irp.f | 40 ++++++++++ src/utils/one_e_integration.irp.f | 76 ++++++++++--------- 5 files changed, 126 insertions(+), 75 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index a657d6b1..56575b54 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -160,7 +160,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) implicit none integer, intent(in) :: i, j, n_points - double precision, intent(in) :: D_center(3,n_points), delta + double precision, intent(in) :: D_center(n_points,3), delta double precision, intent(out) :: resv(n_points) integer :: power_A(3), power_B(3), l, k @@ -284,7 +284,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, implicit none integer, intent(in) :: i, j, n_points - double precision, intent(in) :: B_center(3), beta, D_center(3,n_points), delta + double precision, intent(in) :: B_center(3), beta, D_center(n_points,3), delta double precision, intent(out) :: resv(n_points) integer :: power_A1(3), power_A2(3), l, k @@ -321,19 +321,19 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, A1_center(1:3) = nucl_coord(ao_nucl(i),1:3) A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) - allocate (fact_g(n_points), G_center(3,n_points), analytical_j(n_points) ) + allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) ) bg = beta * gama_inv dg = delta * gama_inv bdg = bg * delta do ipoint=1,n_points - G_center(1,ipoint) = bg * B_center(1) + dg * D_center(1,ipoint) - G_center(2,ipoint) = bg * B_center(2) + dg * D_center(2,ipoint) - G_center(3,ipoint) = bg * B_center(3) + dg * D_center(3,ipoint) + G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1) + G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2) + G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3) fact_g(ipoint) = bdg * ( & - (B_center(1) - D_center(1,ipoint)) * (B_center(1) - D_center(1,ipoint)) & - + (B_center(2) - D_center(2,ipoint)) * (B_center(2) - D_center(2,ipoint)) & - + (B_center(3) - D_center(3,ipoint)) * (B_center(3) - D_center(3,ipoint)) ) + (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) & + + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) & + + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) ) if(fact_g(ipoint) < 10d0) then fact_g(ipoint) = dexp(-fact_g(ipoint)) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 6875bc7a..272371c8 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -19,7 +19,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) allocate(int_fit_v(n_points_final_grid)) @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& !$OMP coef_fit, expo_fit, int_fit_v, tmp) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points, n_max_fit_slat, & + !$OMP final_grid_points_transp, n_max_fit_slat, & !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& @@ -55,7 +55,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points, expo_fit, i, j, int_fit_v, n_points_final_grid) + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, expo_fit, i, j, int_fit_v, n_points_final_grid) do ipoint = 1, n_points_final_grid int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 0da39561..ae62a86c 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -56,7 +56,7 @@ end !--- -subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) +subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) BEGIN_DOC ! Computes the following integral : ! @@ -70,59 +70,66 @@ subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B, implicit none include 'constants.include.F' integer, intent(in) :: n_points - double precision, intent(in) :: D_center(3,n_points), delta ! pure gaussian "D" + double precision, intent(in) :: D_center_(n_points,3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: rvec(n_points) - double precision :: overlap_x,overlap_y,overlap_z,overlap + double precision, allocatable :: overlap(:) + double precision :: overlap_x, overlap_y, overlap_z integer :: maxab integer, allocatable :: iorder_a_new(:) double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) double precision, allocatable :: fact_a_new(:) double precision :: alpha_new - double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + double precision :: accu,thr, coefxy integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint dim1=100 thr = 1.d-10 d(:) = 0 -! maxab = maxval(d(1:3)) - maxab = max_dim + maxab = maxval(d(1:3)) + + double precision, allocatable :: D_center(:,:) + allocate(D_center(3,n_points)) + D_center(1,1:n_points) = D_center_(1:n_points,1) + D_center(2,1:n_points) = D_center_(1:n_points,2) + D_center(3,1:n_points) = D_center_(1:n_points,3) + + allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & - fact_a_new(n_points), iorder_a_new(3)) + fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & D_center, A_center, n_points) do ipoint=1,n_points + rvec(ipoint) = 0.d0 + enddo - ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 - accu = 0.d0 - do lx = 0, iorder_a_new(1) - coefx = A_new(lx,1,ipoint) - if(dabs(coefx).lt.thr)cycle - iorder_tmp(1) = lx - do ly = 0, iorder_a_new(2) - coefy = A_new(ly,2,ipoint) - coefxy = coefx * coefy - if(dabs(coefxy).lt.thr)cycle - iorder_tmp(2) = ly - do lz = 0, iorder_a_new(3) - coefz = A_new(lz,3,ipoint) - coefxyz = coefxy * coefz - if(dabs(coefxyz).lt.thr)cycle - iorder_tmp(3) = lz - call overlap_gaussian_xyz(A_center_new(1,ipoint),B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - accu += coefxyz * overlap + do lx = 0, iorder_a_new(1) + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + iorder_tmp(3) = lz + call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) + do ipoint=1,n_points + rvec(ipoint) = rvec(ipoint) + A_new(lx,1,ipoint) * & + A_new(ly,2,ipoint) * & + A_new(lz,3,ipoint) * overlap(ipoint) enddo enddo enddo - rvec(ipoint) = fact_a_new(ipoint) * accu - end do + enddo + + do ipoint=1,n_points + rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint) + enddo + deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap) end !--- diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index fd9e233b..7bf73c15 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -403,6 +403,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) end subroutine +!- + +subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D with multiple xa + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + END_DOC + + integer, intent(in) :: n_points + double precision , intent(in) :: a,b ! Exponents + double precision , intent(in) :: xa(n_points),xb ! Centers + double precision , intent(out) :: p(n_points) ! New exponent + double precision , intent(out) :: xp(n_points) ! New center + double precision , intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + integer :: ipoint + + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab, ab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + do ipoint = 1, n_points + xab = xa(ipoint)-xb + k(ipoint) = ab*xab*xab + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + cycle + endif + k(ipoint) = exp(-k(ipoint)) + xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv + enddo +end subroutine + + diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index e175042b..ef0c4ffc 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -154,7 +154,7 @@ end ! --- subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& - power_B,overlap_x,overlap_y,overlap_z,overlap,dim, n_points) + power_B,overlap,dim, n_points) implicit none BEGIN_DOC !.. math:: @@ -165,53 +165,57 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& END_DOC include 'constants.include.F' integer,intent(in) :: dim, n_points - double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions + double precision,intent(in) :: A_center(3,n_points),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions - double precision, intent(out) :: overlap_x(n_points),overlap_y(n_points),overlap_z(n_points),overlap(n_points) - double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p + double precision, intent(out) :: overlap(n_points) double precision :: F_integral_tab(0:max_dim) - integer :: iorder_p(3) - - call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) - if(fact_p.lt.1d-20)then - overlap_x = 1.d-10 - overlap_y = 1.d-10 - overlap_z = 1.d-10 - overlap = 1.d-10 - return - endif + double precision :: p, overlap_x, overlap_y, overlap_z + double precision, allocatable :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:) + integer :: iorder_p(3), ipoint, ldp integer :: nmax double precision :: F_integral + + ldp = max_dim + allocate(P_new(0:ldp,3,n_points), P_center(3,n_points), fact_p(n_points), & + fact_pp(n_points), pp(n_points)) + + call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) + nmax = maxval(iorder_p) - do i = 0,nmax + do i=0, nmax F_integral_tab(i) = F_integral(i,p) enddo - overlap_x = P_new(0,1) * F_integral_tab(0) - overlap_y = P_new(0,2) * F_integral_tab(0) - overlap_z = P_new(0,3) * F_integral_tab(0) integer :: i - do i = 1,iorder_p(1) - overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i) + + call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points) + + do ipoint=1,n_points + if(fact_p(ipoint).lt.1d-20)then + overlap(ipoint) = 1.d-10 + cycle + endif + + overlap_x = P_new(0,1,ipoint) * F_integral_tab(0) + do i = 1,iorder_p(1) + overlap_x = overlap_x + P_new(i,1,ipoint) * F_integral_tab(i) + enddo + + overlap_y = P_new(0,2,ipoint) * F_integral_tab(0) + do i = 1,iorder_p(2) + overlap_y = overlap_y + P_new(i,2,ipoint) * F_integral_tab(i) + enddo + + overlap_z = P_new(0,3,ipoint) * F_integral_tab(0) + do i = 1,iorder_p(3) + overlap_z = overlap_z + P_new(i,3,ipoint) * F_integral_tab(i) + enddo + + overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint) enddo - call gaussian_product_x(alpha,A_center(1),beta,B_center(1),fact_p,p,P_center(1)) - overlap_x *= fact_p - - do i = 1,iorder_p(2) - overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i) - enddo - call gaussian_product_x(alpha,A_center(2),beta,B_center(2),fact_p,p,P_center(2)) - overlap_y *= fact_p - - do i = 1,iorder_p(3) - overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i) - enddo - call gaussian_product_x(alpha,A_center(3),beta,B_center(3),fact_p,p,P_center(3)) - overlap_z *= fact_p - - overlap = overlap_x * overlap_y * overlap_z + deallocate(P_new, P_center, fact_p, pp, fact_pp) end ! --- From 768e1ac5d896afa2192d10deb20039698f3c3c84 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 22:29:58 +0100 Subject: [PATCH 66/70] Vectorizing integrals --- .../prim_int_gauss_gauss.irp.f | 2 +- src/utils/integration.irp.f | 135 +++++++++++++----- src/utils/one_e_integration.irp.f | 2 +- 3 files changed, 104 insertions(+), 35 deletions(-) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index ae62a86c..4cba0965 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -90,7 +90,7 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B thr = 1.d-10 d(:) = 0 - maxab = maxval(d(1:3)) + maxab = maxval(power_A(1:3)) double precision, allocatable :: D_center(:,:) allocate(D_center(3,n_points)) diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 7bf73c15..29c6d1ff 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -161,52 +161,82 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) +!TODO TRANSP +double precision, allocatable :: P_a_(:,:,:), P_b_(:,:,:), A_center_(:,:), P_center_(:,:), P_new_(:,:,:) +allocate(A_center_(n_points,3), P_center_(n_points,3), P_new_(n_points,0:ldp,3)) +A_center_(1:n_points,1) = A_center(1,1:n_points) +A_center_(1:n_points,2) = A_center(2,1:n_points) +A_center_(1:n_points,3) = A_center(3,1:n_points) +P_center_(1:n_points,1) = P_center(1,1:n_points) +P_center_(1:n_points,2) = P_center(2,1:n_points) +P_center_(1:n_points,3) = P_center(3,1:n_points) + if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) + lda = maxval(a) ldb = 0 - allocate(P_a(0:lda,3,n_points),P_b(0:0,3,n_points)) + allocate(P_a_(n_points,0:lda,3), P_b_(n_points,0:0,3)) - call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) + + call recentered_poly2_v0(P_a_,lda,A_center_,P_center_,a,P_b_,B_center,P_center_,n_points) iorder(1:3) = a(1:3) do ipoint=1,n_points do xyz=1,3 - P_new(0,xyz,ipoint) = P_b(0,xyz,ipoint) * P_a(0,xyz,ipoint) + P_new_(ipoint,0,xyz) = P_a_(ipoint,0,xyz) * P_b_(ipoint,0,xyz) do i=1,a(xyz) - P_new(i,xyz,ipoint) = P_new(i,xyz,ipoint) + P_b(0,xyz,ipoint) * P_a(i,xyz,ipoint) + P_new_(ipoint,i,xyz) = P_new_(ipoint,i,xyz) + P_b_(ipoint,0,xyz) * P_a_(ipoint,i,xyz) enddo enddo enddo + +do ipoint=1,n_points +do i=0,ldp +P_new(i,1,ipoint) = P_new_(ipoint,i,1) +P_new(i,2,ipoint) = P_new_(ipoint,i,2) +P_new(i,3,ipoint) = P_new_(ipoint,i,3) +enddo +enddo return endif lda = maxval(a) ldb = maxval(b) - allocate(P_a(0:lda,3,n_points),P_b(0:ldb,3,n_points)) - call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) + allocate(P_a_(n_points,0:lda,3), P_b_(n_points,0:ldb,3)) + + call recentered_poly2_v(P_a_,lda,A_center_,P_center_,a,P_b_,ldb,B_center,P_center_,b,n_points) + iorder(1:3) = a(1:3) + b(1:3) + do xyz=1,3 if (b(xyz) == 0) then do ipoint=1,n_points - P_new(0,xyz,ipoint) = P_b(0,xyz,ipoint) * P_a(0,xyz,ipoint) + P_new_(ipoint,0,xyz) = P_a_(ipoint,0,xyz) * P_b_(ipoint,0,xyz) do i=1,a(xyz) - P_new(i,xyz,ipoint) = P_new(i,xyz,ipoint) + P_b(0,xyz,ipoint) * P_a(i,xyz,ipoint) + P_new_(ipoint,i,xyz) = P_new_(ipoint,i,xyz) + P_b_(ipoint,0,xyz) * P_a_(ipoint,i,xyz) enddo enddo else - do ipoint=1,n_points - do i=0,iorder(xyz) - P_new(i,xyz,ipoint) = 0.d0 + do i=0,iorder(xyz) + do ipoint=1,n_points + P_new_(ipoint,i,xyz) = 0.d0 enddo - n_new=0 - call multiply_poly(P_a(0,xyz,ipoint),a(xyz),P_b(0,xyz,ipoint),b(xyz),P_new(0,xyz,ipoint),n_new) enddo + call multiply_poly_v(P_a_(1,0,xyz), a(xyz),P_b_(1,0,xyz),b(xyz),P_new_(1,0,xyz),ldp,n_points) endif enddo + +do ipoint=1,n_points +do i=0,ldp +P_new(i,1,ipoint) = P_new_(ipoint,i,1) +P_new(i,2,ipoint) = P_new_(ipoint,i,2) +P_new(i,3,ipoint) = P_new_(ipoint,i,3) +enddo +enddo end !- @@ -487,6 +517,45 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) end +subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) + implicit none + BEGIN_DOC + ! Multiply pairs of polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc, n_points + integer, intent(in) :: nd + double precision, intent(in) :: b(n_points,0:nb), c(n_points,0:nc) + double precision, intent(inout) :: d(n_points,0:nd) + + integer :: ib, ic, id, k, ipoint + if (nd < nb+nc) then + print *, nd, nb, nc + print *, irp_here, ': nd < nb+nc' + stop 1 + endif + + do ic = 0,nc + do ipoint=1, n_points + d(ipoint,ic) = d(ipoint,ic) + c(ipoint,ic) * b(ipoint,0) + enddo + enddo + + do ib=1,nb + do ipoint=1, n_points + d(ipoint, ib) = d(ipoint, ib) + c(ipoint,0) * b(ipoint, ib) + enddo + do ic = 1,nc + do ipoint=1, n_points + d(ipoint, ib+ic) = d(ipoint, ib+ic) + c(ipoint,ic) * b(ipoint, ib) + enddo + enddo + enddo + +end + + subroutine add_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -593,8 +662,8 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) ! Recenter two polynomials END_DOC integer, intent(in) :: a(3),b(3), n_points, lda, ldb - double precision, intent(in) :: x_A(3,n_points),x_P(3,n_points),x_B(3),x_Q(3,n_points) - double precision, intent(out) :: P_new(0:lda,3,n_points),P_new2(0:ldb,3,n_points) + double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) + double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3) double precision :: binom_func integer :: i,j,k,l, minab(3), maxab(3),ipoint, xyz double precision, allocatable :: pows_a(:,:), pows_b(:,:) @@ -610,9 +679,9 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) if ((a(xyz)<0).or.(b(xyz)<0) ) cycle do ipoint=1,n_points pows_a(ipoint,0) = 1.d0 - pows_a(ipoint,1) = (x_P(xyz,ipoint) - x_A(xyz,ipoint)) + pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz)) pows_b(ipoint,0) = 1.d0 - pows_b(ipoint,1) = (x_Q(xyz,ipoint) - x_B(xyz)) + pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz)) enddo do i = 2,maxab(xyz) do ipoint=1,n_points @@ -621,39 +690,39 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) enddo enddo do ipoint=1,n_points - P_new (0,xyz,ipoint) = pows_a(ipoint,a(xyz)) - P_new2(0,xyz,ipoint) = pows_b(ipoint,b(xyz)) + P_new (ipoint,0,xyz) = pows_a(ipoint,a(xyz)) + P_new2(ipoint,0,xyz) = pows_b(ipoint,b(xyz)) enddo do i = 1,min(minab(xyz),20) fa = binom_transp(a(xyz)-i,a(xyz)) fb = binom_transp(b(xyz)-i,b(xyz)) do ipoint=1,n_points - P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) - P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + P_new2(ipoint,i,xyz) = fb * pows_b(ipoint,b(xyz)-i) enddo enddo do i = minab(xyz)+1,min(a(xyz),20) fa = binom_transp(a(xyz)-i,a(xyz)) do ipoint=1,n_points - P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo do i = minab(xyz)+1,min(b(xyz),20) fb = binom_transp(b(xyz)-i,b(xyz)) do ipoint=1,n_points - P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + P_new2(ipoint,i,xyz) = fb * pows_b(ipoint,b(xyz)-i) enddo enddo do i = 21,a(xyz) fa = binom_func(a(xyz),a(xyz)-i) do ipoint=1,n_points - P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo do i = 21,b(xyz) fb = binom_func(b(xyz),b(xyz)-i) do ipoint=1,n_points - P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + P_new2(ipoint,i,xyz) = fb * pows_b(ipoint,b(xyz)-i) enddo enddo enddo @@ -666,8 +735,8 @@ subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) ! Recenter two polynomials. Special case for b=(0,0,0) END_DOC integer, intent(in) :: a(3), n_points, lda - double precision, intent(in) :: x_A(3,n_points),x_P(3,n_points),x_B(3),x_Q(3,n_points) - double precision, intent(out) :: P_new(0:lda,3,n_points),P_new2(3,n_points) + double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) + double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,3) double precision :: binom_func integer :: i,j,k,l, xyz, ipoint, maxab(3) double precision, allocatable :: pows_a(:,:), pows_b(:,:) @@ -681,9 +750,9 @@ subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) if (a(xyz)<0) cycle do ipoint=1,n_points pows_a(ipoint,0) = 1.d0 - pows_a(ipoint,1) = (x_P(xyz,ipoint) - x_A(xyz,ipoint)) + pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz)) pows_b(ipoint,0) = 1.d0 - pows_b(ipoint,1) = (x_Q(xyz,ipoint) - x_B(xyz)) + pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz)) enddo do i = 2,maxab(xyz) do ipoint=1,n_points @@ -692,19 +761,19 @@ subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) enddo enddo do ipoint=1,n_points - P_new (0,xyz,ipoint) = pows_a(ipoint,a(xyz)) - P_new2(xyz,ipoint) = pows_b(ipoint,0) + P_new (ipoint,0,xyz) = pows_a(ipoint,a(xyz)) + P_new2(ipoint,xyz) = pows_b(ipoint,0) enddo do i = 1,min(a(xyz),20) fa = binom_transp(a(xyz)-i,a(xyz)) do ipoint=1,n_points - P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo do i = 21,a(xyz) fa = binom_func(a(xyz),a(xyz)-i) do ipoint=1,n_points - P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index ef0c4ffc..2a32d1b9 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -176,7 +176,7 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& integer :: nmax double precision :: F_integral - ldp = max_dim + ldp = maxval( power_A(1:3) + power_B(1:3) ) allocate(P_new(0:ldp,3,n_points), P_center(3,n_points), fact_p(n_points), & fact_pp(n_points), pp(n_points)) From 86b4ff44d91ba77d39da56a2b013d000bf921f53 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 22:50:14 +0100 Subject: [PATCH 67/70] Vectorized int2_grad1u2_grad2u2_j1b2 --- src/ao_many_one_e_ints/listj1b.irp.f | 2 +- .../prim_int_gauss_gauss.irp.f | 19 ++--- src/utils/integration.irp.f | 76 ++++++------------- src/utils/one_e_integration.irp.f | 16 ++-- 4 files changed, 39 insertions(+), 74 deletions(-) diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index ced6f4e9..1178cc31 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -176,8 +176,8 @@ END_PROVIDER enddo - ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle + ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 4cba0965..1638aa9e 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -56,7 +56,7 @@ end !--- -subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) +subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) BEGIN_DOC ! Computes the following integral : ! @@ -70,7 +70,7 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B implicit none include 'constants.include.F' integer, intent(in) :: n_points - double precision, intent(in) :: D_center_(n_points,3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(n_points,3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: rvec(n_points) @@ -92,14 +92,7 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B maxab = maxval(power_A(1:3)) - double precision, allocatable :: D_center(:,:) - allocate(D_center(3,n_points)) - D_center(1,1:n_points) = D_center_(1:n_points,1) - D_center(2,1:n_points) = D_center_(1:n_points,2) - D_center(3,1:n_points) = D_center_(1:n_points,3) - - - allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & + allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), & fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & @@ -118,9 +111,9 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B iorder_tmp(3) = lz call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) do ipoint=1,n_points - rvec(ipoint) = rvec(ipoint) + A_new(lx,1,ipoint) * & - A_new(ly,2,ipoint) * & - A_new(lz,3,ipoint) * overlap(ipoint) + rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * & + A_new(ipoint,ly,2) * & + A_new(ipoint,lz,3) * overlap(ipoint) enddo enddo enddo diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 29c6d1ff..f68465c7 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -147,12 +147,12 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde integer, intent(in) :: n_points, ldp integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) double precision, intent(in) :: alpha, beta ! exponents - double precision, intent(in) :: A_center(3,n_points) ! A center + double precision, intent(in) :: A_center(n_points,3) ! A center double precision, intent(in) :: B_center (3) ! B center - double precision, intent(out) :: P_center(3,n_points) ! new center + double precision, intent(out) :: P_center(n_points,3) ! new center double precision, intent(out) :: p ! new exponent double precision, intent(out) :: fact_k(n_points) ! constant factor - double precision, intent(out) :: P_new(0:ldp,3,n_points)! polynomial + double precision, intent(out) :: P_new(n_points,0:ldp,3)! polynomial integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) @@ -161,82 +161,54 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) -!TODO TRANSP -double precision, allocatable :: P_a_(:,:,:), P_b_(:,:,:), A_center_(:,:), P_center_(:,:), P_new_(:,:,:) -allocate(A_center_(n_points,3), P_center_(n_points,3), P_new_(n_points,0:ldp,3)) -A_center_(1:n_points,1) = A_center(1,1:n_points) -A_center_(1:n_points,2) = A_center(2,1:n_points) -A_center_(1:n_points,3) = A_center(3,1:n_points) -P_center_(1:n_points,1) = P_center(1,1:n_points) -P_center_(1:n_points,2) = P_center(2,1:n_points) -P_center_(1:n_points,3) = P_center(3,1:n_points) - if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) - lda = maxval(a) ldb = 0 - allocate(P_a_(n_points,0:lda,3), P_b_(n_points,0:0,3)) + allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3)) - - call recentered_poly2_v0(P_a_,lda,A_center_,P_center_,a,P_b_,B_center,P_center_,n_points) + call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) iorder(1:3) = a(1:3) do ipoint=1,n_points do xyz=1,3 - P_new_(ipoint,0,xyz) = P_a_(ipoint,0,xyz) * P_b_(ipoint,0,xyz) + P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) do i=1,a(xyz) - P_new_(ipoint,i,xyz) = P_new_(ipoint,i,xyz) + P_b_(ipoint,0,xyz) * P_a_(ipoint,i,xyz) + P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo enddo -do ipoint=1,n_points -do i=0,ldp -P_new(i,1,ipoint) = P_new_(ipoint,i,1) -P_new(i,2,ipoint) = P_new_(ipoint,i,2) -P_new(i,3,ipoint) = P_new_(ipoint,i,3) -enddo -enddo return endif lda = maxval(a) ldb = maxval(b) - allocate(P_a_(n_points,0:lda,3), P_b_(n_points,0:ldb,3)) - - call recentered_poly2_v(P_a_,lda,A_center_,P_center_,a,P_b_,ldb,B_center,P_center_,b,n_points) + allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3)) + call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) iorder(1:3) = a(1:3) + b(1:3) do xyz=1,3 if (b(xyz) == 0) then do ipoint=1,n_points - P_new_(ipoint,0,xyz) = P_a_(ipoint,0,xyz) * P_b_(ipoint,0,xyz) + P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) do i=1,a(xyz) - P_new_(ipoint,i,xyz) = P_new_(ipoint,i,xyz) + P_b_(ipoint,0,xyz) * P_a_(ipoint,i,xyz) + P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo else do i=0,iorder(xyz) do ipoint=1,n_points - P_new_(ipoint,i,xyz) = 0.d0 + P_new(ipoint,i,xyz) = 0.d0 enddo enddo - call multiply_poly_v(P_a_(1,0,xyz), a(xyz),P_b_(1,0,xyz),b(xyz),P_new_(1,0,xyz),ldp,n_points) + call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points) endif enddo - -do ipoint=1,n_points -do i=0,ldp -P_new(i,1,ipoint) = P_new_(ipoint,i,1) -P_new(i,2,ipoint) = P_new_(ipoint,i,2) -P_new(i,3,ipoint) = P_new_(ipoint,i,3) -enddo -enddo end !- @@ -353,9 +325,9 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) integer, intent(in) :: n_points double precision, intent(in) :: a,b ! Exponents - double precision, intent(in) :: xa(3,n_points),xb(3) ! Centers + double precision, intent(in) :: xa(n_points,3),xb(3) ! Centers double precision, intent(out) :: p ! New exponent - double precision, intent(out) :: xp(3,n_points) ! New center + double precision, intent(out) :: xp(n_points,3) ! New center double precision, intent(out) :: k(n_points) ! Constant double precision :: p_inv @@ -377,20 +349,20 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) bpxb(3) = bp*xb(3) do ipoint=1,n_points - xab(1) = xa(1,ipoint)-xb(1) - xab(2) = xa(2,ipoint)-xb(2) - xab(3) = xa(3,ipoint)-xb(3) + xab(1) = xa(ipoint,1)-xb(1) + xab(2) = xa(ipoint,2)-xb(2) + xab(3) = xa(ipoint,3)-xb(3) k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3)) if (k(ipoint) > 40.d0) then k(ipoint)=0.d0 - xp(1,ipoint) = 0.d0 - xp(2,ipoint) = 0.d0 - xp(3,ipoint) = 0.d0 + xp(ipoint,1) = 0.d0 + xp(ipoint,2) = 0.d0 + xp(ipoint,3) = 0.d0 else k(ipoint) = dexp(-k(ipoint)) - xp(1,ipoint) = ap*xa(1,ipoint)+bpxb(1) - xp(2,ipoint) = ap*xa(2,ipoint)+bpxb(2) - xp(3,ipoint) = ap*xa(3,ipoint)+bpxb(3) + xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1) + xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2) + xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3) endif enddo end subroutine diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index 2a32d1b9..a62c657e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -165,7 +165,7 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& END_DOC include 'constants.include.F' integer,intent(in) :: dim, n_points - double precision,intent(in) :: A_center(3,n_points),B_center(3) ! center of the x1 functions + double precision,intent(in) :: A_center(n_points,3),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions double precision, intent(out) :: overlap(n_points) @@ -177,7 +177,7 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& double precision :: F_integral ldp = maxval( power_A(1:3) + power_B(1:3) ) - allocate(P_new(0:ldp,3,n_points), P_center(3,n_points), fact_p(n_points), & + allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), & fact_pp(n_points), pp(n_points)) call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) @@ -197,19 +197,19 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& cycle endif - overlap_x = P_new(0,1,ipoint) * F_integral_tab(0) + overlap_x = P_new(ipoint,0,1) * F_integral_tab(0) do i = 1,iorder_p(1) - overlap_x = overlap_x + P_new(i,1,ipoint) * F_integral_tab(i) + overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i) enddo - overlap_y = P_new(0,2,ipoint) * F_integral_tab(0) + overlap_y = P_new(ipoint,0,2) * F_integral_tab(0) do i = 1,iorder_p(2) - overlap_y = overlap_y + P_new(i,2,ipoint) * F_integral_tab(i) + overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i) enddo - overlap_z = P_new(0,3,ipoint) * F_integral_tab(0) + overlap_z = P_new(ipoint,0,3) * F_integral_tab(0) do i = 1,iorder_p(3) - overlap_z = overlap_z + P_new(i,3,ipoint) * F_integral_tab(i) + overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i) enddo overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint) From 49cc53e9191da0ed6c3293b31a33dd6ccfb2859f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 23:07:43 +0100 Subject: [PATCH 68/70] Fixed OpenMP --- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 116 ++++++++++--------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 272371c8..84f2d8c7 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -11,7 +11,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n implicit none integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), int_fit, expo_fit, coef_fit + double precision :: r(3), expo_fit, coef_fit double precision :: coef, beta, B_center(3) double precision :: tmp double precision :: wall0, wall1 @@ -22,7 +22,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - allocate(int_fit_v(n_points_final_grid)) int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & @@ -34,6 +33,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& !$OMP ao_overlap_abs) + + allocate(int_fit_v(n_points_final_grid)) !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num @@ -66,6 +67,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n enddo enddo !$OMP END DO + deallocate(int_fit_v) !$OMP END PARALLEL do ipoint = 1, n_points_final_grid @@ -84,70 +86,74 @@ END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] - + BEGIN_DOC ! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2 ! END_DOC - + implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), int_fit, expo_fit, coef_fit - double precision :: coef, beta, B_center(3), tmp - double precision :: wall0, wall1 - - double precision, external :: overlap_gauss_r12_ao_with1s - - provide mu_erf final_grid_points j1b_pen + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3), tmp + double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:) + + double precision, external :: overlap_gauss_r12_ao_with1s + + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - + int2_u2_j1b2 = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & - !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u2_j1b2) - !$OMP DO - !do ipoint = 1, 10 - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) - - do i = 1, ao_num - do j = i, ao_num - - tmp = 0.d0 - do i_1s = 1, List_all_comb_b3_size - - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - - do i_fit = 1, n_max_fit_slat - - expo_fit = expo_gauss_j_mu_x_2(i_fit) - coef_fit = coef_gauss_j_mu_x_2(i_fit) - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - - tmp += coef * coef_fit * int_fit + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points_transp, n_max_fit_slat, & + !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u2_j1b2) + allocate(int_fit_v(n_points_final_grid)) + !$OMP DO + do i = 1, ao_num + do j = i, ao_num + + tmp = 0.d0 + do i_1s = 1, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + do i_fit = 1, n_max_fit_slat + + expo_fit = expo_gauss_j_mu_x_2(i_fit) + coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef + + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points_transp(ipoint,1) + r(2) = final_grid_points_transp(ipoint,2) + r(3) = final_grid_points_transp(ipoint,3) + + int_fit_v(ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + enddo + + do ipoint = 1, n_points_final_grid + int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) enddo enddo - - int2_u2_j1b2(j,i,ipoint) = tmp enddo + enddo enddo - !$OMP END DO - !$OMP END PARALLEL - + !$OMP END DO + deallocate(int_fit_v) + !$OMP END PARALLEL + do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 @@ -155,10 +161,10 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final enddo enddo enddo - + call wall_time(wall1) print*, ' wall time for int2_u2_j1b2', wall1 - wall0 - + END_PROVIDER ! --- From 80b01d59473e001806802cec983d54e2bf8663fb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 23:18:16 +0100 Subject: [PATCH 69/70] Vectorized int2_u2_j1b2 --- src/ao_many_one_e_ints/ao_gaus_gauss.irp.f | 2 + src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 79 +++++++++----------- 2 files changed, 36 insertions(+), 45 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index 56575b54..facb6264 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -368,3 +368,5 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, end + + diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 84f2d8c7..d049ec22 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -56,11 +56,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, expo_fit, i, j, int_fit_v, n_points_final_grid) + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & + expo_fit, i, j, int_fit_v, n_points_final_grid) do ipoint = 1, n_points_final_grid int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) enddo + enddo enddo @@ -86,74 +88,69 @@ END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] - + BEGIN_DOC ! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2 ! END_DOC - + implicit none integer :: i, j, ipoint, i_1s, i_fit double precision :: r(3), expo_fit, coef_fit double precision :: coef, beta, B_center(3), tmp double precision :: wall0, wall1 double precision, allocatable :: int_fit_v(:) - + double precision, external :: overlap_gauss_r12_ao_with1s - + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - - int2_u2_j1b2 = 0.d0 - + + int2_u2_j1b2(:,:,:) = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v, tmp) & + !$OMP coef_fit, expo_fit, int_fit_v) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& !$OMP final_grid_points_transp, n_max_fit_slat, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u2_j1b2) allocate(int_fit_v(n_points_final_grid)) - !$OMP DO + !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num - - tmp = 0.d0 + do i_1s = 1, List_all_comb_b3_size - + coef = List_all_comb_b3_coef (i_1s) beta = List_all_comb_b3_expo (i_1s) B_center(1) = List_all_comb_b3_cent(1,i_1s) B_center(2) = List_all_comb_b3_cent(2,i_1s) B_center(3) = List_all_comb_b3_cent(3,i_1s) - + do i_fit = 1, n_max_fit_slat - + expo_fit = expo_gauss_j_mu_x_2(i_fit) coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef - - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points_transp(ipoint,1) - r(2) = final_grid_points_transp(ipoint,2) - r(3) = final_grid_points_transp(ipoint,3) - - int_fit_v(ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - enddo - + + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & + expo_fit, i, j, int_fit_v, n_points_final_grid) + do ipoint = 1, n_points_final_grid int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) enddo + enddo + enddo - enddo enddo !$OMP END DO deallocate(int_fit_v) !$OMP END PARALLEL - + do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 @@ -161,10 +158,10 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final enddo enddo enddo - + call wall_time(wall1) print*, ' wall time for int2_u2_j1b2', wall1 - wall0 - + END_PROVIDER ! --- @@ -185,10 +182,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p double precision :: tmp_x, tmp_y, tmp_z double precision :: wall0, wall1 - provide mu_erf final_grid_points j1b_pen + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - int2_u_grad1u_x_j1b2 = 0.d0 + int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & @@ -196,22 +193,19 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & !$OMP tmp_x, tmp_y, tmp_z) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & + !$OMP final_grid_points_transp, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) + r(1) = final_grid_points_transp(ipoint,1) + r(2) = final_grid_points_transp(ipoint,2) + r(3) = final_grid_points_transp(ipoint,3) do i = 1, ao_num do j = i, ao_num - tmp_x = 0.d0 - tmp_y = 0.d0 - tmp_z = 0.d0 do i_1s = 1, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) @@ -236,21 +230,16 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - !if(expo_coef_1s .gt. 80.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - !if(dabs(coef_tmp) .lt. 1d-10) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) - tmp_x += coef_tmp * int_fit(1) - tmp_y += coef_tmp * int_fit(2) - tmp_z += coef_tmp * int_fit(3) + int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_tmp * int_fit(1) + int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_tmp * int_fit(2) + int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_tmp * int_fit(3) enddo enddo - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z enddo enddo enddo From dddd4090085e0970d77ab7337a3fdc683e0099f2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Nov 2022 02:13:43 +0100 Subject: [PATCH 70/70] Vectorizing integrals. --- src/ao_many_one_e_ints/ao_erf_gauss.irp.f | 210 +++++++++++++++--- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 113 ++++++---- src/ao_one_e_ints/pot_ao_erf_ints.irp.f | 218 ++++++++++++++++++- 3 files changed, 466 insertions(+), 75 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index fe25e9c0..4fc9ad6b 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -19,11 +19,11 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints) return endif n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3)= ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3)= ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) @@ -33,19 +33,19 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints) do m=1,ao_prim_num(i) beta = ao_expo_ordered_transp(m,i) do mm = 1, 3 - ! (x phi_i ) * phi_j + ! (x phi_i ) * phi_j ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1} ! ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x power_B_tmp = power_B - contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) + contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) - ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 + * ao_coef_normalized_ordered_transp(m,i) + ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 power_B_tmp(mm) += 1 - contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) + contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) xyz_ints(mm) += contrib * 1.d0 * ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) + * ao_coef_normalized_ordered_transp(m,i) enddo enddo enddo @@ -58,7 +58,7 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center) BEGIN_DOC ! phi_j_erf_mu_r_phi = int dr phi_j(r) [erf(mu |r - C|)/|r-C|] phi_i(r) END_DOC - + implicit none integer, intent(in) :: i,j double precision, intent(in) :: mu_in, C_center(3) @@ -77,24 +77,24 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center) n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3) = ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3) = ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) - + do l = 1, ao_prim_num(j) alpha = ao_expo_ordered_transp(l,j) do m = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(m,i) - contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) - phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i) + phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i) enddo enddo @@ -124,11 +124,11 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints) return endif n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3)= ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3)= ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) @@ -141,7 +141,7 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints) call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints) do mm = 1, 4 gauss_ints(mm) += xyz_ints(mm) * ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) + * ao_coef_normalized_ordered_transp(m,i) enddo enddo enddo @@ -161,7 +161,7 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints) integer, intent(in) :: i, j double precision, intent(in) :: mu, C_center(3), delta double precision, intent(out) :: gauss_ints - + integer :: n_pt_in, l, m integer :: num_A, power_A(3), num_b, power_B(3) double precision :: alpha, beta, A_center(3), B_center(3), coef @@ -177,16 +177,16 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints) n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3) = ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3) = ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) - + do l = 1, ao_prim_num(j) alpha = ao_expo_ordered_transp(l,j) do m = 1, ao_prim_num(i) @@ -219,7 +219,7 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) ! END_DOC - include 'utils/constants.include.F' + include 'utils/constants.include.F' implicit none @@ -274,7 +274,82 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) end subroutine NAI_pol_x_mult_erf_ao ! --- +subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, n_points + double precision, intent(in) :: mu_in, C_center(n_points,3) + double precision, intent(out) :: ints(n_points,3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in + integer :: power_xA(3), m, ipoint + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision, allocatable :: integral(:) + double precision :: NAI_pol_mult_erf + + ints = 0.d0 + if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then + return + endif + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + allocate(integral(n_points)) + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do m = 1, 3 + + power_xA = power_A + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint=1,n_points + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint=1,n_points + ints(ipoint,m) += A_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_v + +! --- subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints) BEGIN_DOC @@ -289,7 +364,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen ! END_DOC - include 'utils/constants.include.F' + include 'utils/constants.include.F' implicit none @@ -333,7 +408,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen do j = 1, ao_prim_num(j_ao) alphaj = ao_expo_ordered_transp (j,j_ao) - coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) ! First term = (x-Ax)**(ax+1) integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & @@ -351,6 +426,91 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen end subroutine NAI_pol_x_mult_erf_ao_with1s +!-- + +subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, n_points + double precision, intent(in) :: beta, B_center(n_points,3), mu_in, C_center(n_points,3) + double precision, intent(out) :: ints(n_points,3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + + integer :: ipoint + double precision, allocatable :: integral(:) + + if(beta .lt. 1d-10) then + call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + return + endif + + ints(:,:) = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then + return + endif + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + allocate(integral(n_points)) + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do m = 1, 3 + + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_Ai + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, & + alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint = 1, n_points + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, & + alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint = 1, n_points + ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_with1s + + ! --- subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints) @@ -361,7 +521,7 @@ subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints) ! ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z END_DOC - include 'utils/constants.include.F' + include 'utils/constants.include.F' integer, intent(in) :: i_ao,j_ao,m double precision, intent(in) :: mu_in, C_center(3) double precision, intent(out):: ints diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index d049ec22..cdc86456 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -175,76 +175,95 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), int_fit(3), expo_fit, coef_fit - double precision :: coef, beta, B_center(3), dist - double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp - double precision :: tmp_x, tmp_y, tmp_z - double precision :: wall0, wall1 + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3) + double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp + double precision :: tmp_x, tmp_y, tmp_z + double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:,:), dist(:), centr_1s(:,:) provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points_transp, n_max_fit_slat, & - !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) - !$OMP DO + allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3)) do ipoint = 1, n_points_final_grid r(1) = final_grid_points_transp(ipoint,1) r(2) = final_grid_points_transp(ipoint,2) r(3) = final_grid_points_transp(ipoint,3) - do i = 1, ao_num - do j = i, ao_num + dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + + (B_center(2) - r(2)) * (B_center(2) - r(2)) & + + (B_center(3) - r(3)) * (B_center(3) - r(3)) + enddo - do i_1s = 1, List_all_comb_b3_size + int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & - + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points_transp, n_max_fit_slat, dist, & + !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) + allocate(int_fit_v(n_points_final_grid,3)) - do i_fit = 1, n_max_fit_slat + do i_1s = 1, List_all_comb_b3_size - expo_fit = expo_gauss_j_mu_1_erf(i_fit) - coef_fit = coef_gauss_j_mu_1_erf(i_fit) + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + do i_fit = 1, n_max_fit_slat - centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) - centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) - centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) + expo_fit = expo_gauss_j_mu_1_erf(i_fit) + coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef - expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) + alpha_1s = beta + expo_fit + alpha_1s_inv = 1.d0 / alpha_1s - call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points_transp(ipoint,1) + r(2) = final_grid_points_transp(ipoint,2) + r(3) = final_grid_points_transp(ipoint,3) - int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_tmp * int_fit(1) - int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_tmp * int_fit(2) - int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_tmp * int_fit(3) + centr_1s(ipoint,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) + centr_1s(ipoint,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) + centr_1s(ipoint,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) + enddo + + expo_coef_1s = beta * expo_fit * alpha_1s_inv + !$OMP BARRIER + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num + call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,& + 1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid) + + do ipoint = 1, n_points_final_grid + coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint)) + int2_u_grad1u_x_j1b2(1,j,i,ipoint) = & + int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1) + int2_u_grad1u_x_j1b2(2,j,i,ipoint) = & + int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2) + int2_u_grad1u_x_j1b2(3,j,i,ipoint) = & + int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3) enddo enddo - enddo + !$OMP END DO NOWAIT + enddo enddo - !$OMP END DO - !$OMP END PARALLEL + deallocate(int_fit_v) + !$OMP END PARALLEL + + deallocate(dist) do ipoint = 1, n_points_final_grid do i = 2, ao_num diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f index 263e9845..96625df5 100644 --- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f @@ -124,7 +124,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. ! @@ -197,6 +197,92 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, end function NAI_pol_mult_erf +! --- +subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! .. math:: + ! + ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + integer, intent(in) :: n_pt_in, n_points + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in + double precision, intent(out) :: res_v(n_points) + + integer :: i, n_pt, n_pt_out, ipoint + double precision :: P_center(3) + double precision :: d(0:n_pt_in), coeff, dist, const, factor + double precision :: const_factor, dist_integral + double precision :: accu, p_inv, p, rho, p_inv_2 + double precision :: p_new + + double precision :: rint + + p = alpha + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + + dist = 0.d0 + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + enddo + + do ipoint=1,n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const_factor = dist * rho + if(const_factor > 80.d0) then + res_V(ipoint) = 0.d0 + cycle + endif + + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new + + n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) + const = p * dist_integral * p_new * p_new + if(n_pt == 0) then + res_v(ipoint) = coeff * rint(0, const) + cycle + endif + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + p_new = p_new * p_new + call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)& + , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + res_v(ipoint) = accu * coeff + enddo + +end + ! --- double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & @@ -207,7 +293,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) @@ -312,6 +398,131 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A end function NAI_pol_mult_erf_with1s +!-- + +subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2& + , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! .. math :: + ! + ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) + ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) + ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) + ! \exp(-\beta (r - B)^2) + ! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + integer, intent(in) :: n_pt_in, n_points + integer, intent(in) :: power_A1(3), power_A2(3) + double precision, intent(in) :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3) + double precision, intent(in) :: alpha1, alpha2, beta, mu_in + double precision, intent(out) :: res_v(n_points) + + integer :: i, n_pt, n_pt_out, ipoint + double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 + double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor + double precision :: dist_integral + double precision :: d(0:n_pt_in), coeff, const, factor + double precision :: accu + double precision :: p_new, p_new2 + + double precision :: rint + + + ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} + alpha12 = alpha1 + alpha2 + alpha12_inv = 1.d0 / alpha12 + alpha12_inv_2 = 0.5d0 * alpha12_inv + rho12 = alpha1 * alpha2 * alpha12_inv + A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv + A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv + A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv + dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))& + + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))& + + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) + + const_factor12 = dist12 * rho12 + + if(const_factor12 > 80.d0) then + res_v(:) = 0.d0 + return + endif + + ! --- + + ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} + p = alpha12 + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha12 * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) & + + power_A1(3) + power_A2(3) ) + + do ipoint=1,n_points + + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv + dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& + + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& + + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) then + res_v(ipoint) = 0.d0 + cycle + endif + + dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& + + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& + + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + + ! --- + + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new + + const = p * dist_integral * p_new2 + if(n_pt == 0) then + res_v(ipoint) = coeff * rint(0, const) + cycle + endif + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + + !TODO: VECTORIZE HERE + call give_polynomial_mult_center_one_e_erf_opt( & + A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)& + , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + res_v(ipoint) = accu * coeff + end do + +end + +! --- ! --- subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center & @@ -432,10 +643,11 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt ! --- + subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,& power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) BEGIN_DOC - ! Returns the explicit polynomial in terms of the $t$ variable of the + ! Returns the explicit polynomial in terms of the $t$ variable of the ! following polynomial: ! ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.