diff --git a/configure b/configure index b3ed7758..7d063b1d 100755 --- a/configure +++ b/configure @@ -369,7 +369,7 @@ else echo "" echo "${QP_ROOT}/build.ninja does not exist," echo "you need to specify the COMPILATION configuration file." - echo "See ./configure --help for more details." + echo "See ./configure -h for more details." echo "" fi diff --git a/ocaml/Message.ml b/ocaml/Message.ml index b7d77430..049203d7 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -63,11 +63,11 @@ end module Connect_msg : sig type t = Tcp | Inproc | Ipc - val create : typ:string -> t + val create : string -> t val to_string : t -> string end = struct type t = Tcp | Inproc | Ipc - let create ~typ = + let create typ = match typ with | "tcp" -> Tcp | "inproc" -> Inproc @@ -515,9 +515,9 @@ let of_string s = | Connect_ socket -> Connect (Connect_msg.create socket) | NewJob_ { state ; push_address_tcp ; push_address_inproc } -> - Newjob (Newjob_msg.create push_address_tcp push_address_inproc state) + Newjob (Newjob_msg.create ~address_tcp:push_address_tcp ~address_inproc:push_address_inproc ~state) | EndJob_ state -> - Endjob (Endjob_msg.create state) + Endjob (Endjob_msg.create ~state) | GetData_ { state ; client_id ; key } -> GetData (GetData_msg.create ~client_id ~state ~key) | PutData_ { state ; client_id ; key } -> diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 92a6f5ca..ad827316 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -776,7 +776,7 @@ let run ~port = Zmq.Socket.create zmq_context Zmq.Socket.rep in Zmq.Socket.set_linger_period rep_socket 1_000_000; - bind_socket "REP" rep_socket port; + bind_socket ~socket_type:"REP" ~socket:rep_socket ~port; let initial_program_state = { queue = Queuing_system.create () ; diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 141292d8..67dda7b5 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -1097,7 +1097,7 @@ implicit none integer nptsgridmax,nptsgrid double precision coefs_pseudo,ptsgrid parameter(nptsgridmax=50) -common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +double precision common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) !!!!! integer npower_orb(3),l,m,i double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3) @@ -1238,7 +1238,7 @@ end double precision coefs_pseudo,ptsgrid double precision p,q,r,s parameter(nptsgridmax=50) - common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + double precision common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) p=1.d0/dsqrt(2.d0) q=1.d0/dsqrt(3.d0) diff --git a/src/bitmask/core_inact_act_virt.irp.f b/src/bitmask/core_inact_act_virt.irp.f index d83d69e9..f3eaff32 100644 --- a/src/bitmask/core_inact_act_virt.irp.f +++ b/src/bitmask/core_inact_act_virt.irp.f @@ -173,10 +173,7 @@ BEGIN_PROVIDER [integer, n_core_inact_act_orb ] END_DOC n_core_inact_act_orb = (n_core_orb + n_inact_orb + n_act_orb) END_PROVIDER - - - BEGIN_PROVIDER [ integer(bit_kind), core_bitmask , (N_int,2) ] implicit none BEGIN_DOC @@ -443,5 +440,4 @@ BEGIN_PROVIDER [integer, list_all_but_del_orb, (n_all_but_del_orb)] endif enddo -END_PROVIDER - +END_PROVIDER diff --git a/src/csf/cfgCI_interface.f90 b/src/csf/cfgCI_interface.f90 index b701f0ec..73bd600d 100644 --- a/src/csf/cfgCI_interface.f90 +++ b/src/csf/cfgCI_interface.f90 @@ -46,6 +46,24 @@ module cfunctions real (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax) end subroutine getCSFtoDETTransformationMatrix end interface + interface + subroutine gramSchmidt(A, m, n, B) bind(C, name='gramSchmidt') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT32_T),value,intent(in) :: m + integer(kind=C_INT32_T),value,intent(in) :: n + real (kind=C_DOUBLE ),intent(in) :: A(m,n) + real (kind=C_DOUBLE ),intent(out) :: B(m,n) + end subroutine gramSchmidt + end interface + interface + subroutine gramSchmidt_qp(A, m, n, B) bind(C, name='gramSchmidt_qp') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT32_T),value,intent(in) :: m + integer(kind=C_INT32_T),value,intent(in) :: n + real (kind=C_DOUBLE ),intent(in) :: A(m,n) + real (kind=C_DOUBLE ),intent(out) :: B(m,n) + end subroutine gramSchmidt_qp + end interface end module cfunctions subroutine f_dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) & diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 746de04e..3510db37 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1,5 +1,6 @@ #include #include +#include #include "tree_utils.h" void int_to_bin_digit(int64_t in, int count, int* out) @@ -28,19 +29,19 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){ int nsomo = *inpnsomo; int ms = *inpms; int nparcoupl = (nsomo + ms)/2; - *outncsfs = binom(nsomo, nparcoupl); + *outncsfs = binom((double)nsomo, (double)nparcoupl); } void getncsfs(int NSOMO, int MS, int *outncsfs){ - int nparcoupl = (NSOMO + MS)/2; - int nparcouplp1 = ((NSOMO + MS)/2)+1; + int nparcoupl = (NSOMO + MS)/2; // n_alpha + int nparcouplp1 = ((NSOMO + MS)/2)+1; // n_alpha + 1 double tmpndets=0.0; if(NSOMO == 0){ (*outncsfs) = 1; return; } - tmpndets = binom(NSOMO, nparcoupl); - (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1)); + tmpndets = binom((double)NSOMO, (double)nparcoupl); + (*outncsfs) = round(tmpndets - binom((double)NSOMO, (double)nparcouplp1)); } #include @@ -252,6 +253,27 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM buildTreeDriver(bftree, *NSOMO, MS, NBF); } +void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols); + +// QR to orthogonalize CSFs does not work +//void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){ +// int i,j; +// //for(j=0;j 0){ + ndets = (int)binom((double)NSOMO, (double)nalpha); + } + else if(NSOMO == 0){ + ndets = 1; + } + else printf("Something is wrong in calcMEdetpair\n"); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; dettree.rootNode = malloc(sizeof(Node)); @@ -1389,16 +1431,6 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * } else{ - //int addr = -1; - //int inpdet[NSOMO]; - //inpdet[0] = 1; - //inpdet[1] = 1; - //inpdet[2] = 1; - //inpdet[3] = 0; - //inpdet[4] = 0; - //inpdet[5] = 0; - - //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr); int detlist[ndets]; getDetlistDriver(&dettree, NSOMO, detlist); @@ -1411,6 +1443,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO); // Initialize transformation matrix + //printf("MS=%d NBF=%d ndets=%d NSOMO=%d\n",MS,NBF,ndets,NSOMO); + assert( NBF > 0); + assert( ndets > 0); (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double)); (*rows) = NBF; (*cols) = ndets; @@ -1465,9 +1500,10 @@ void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int co getSetBits(Isomo, &NSOMO); int ndets = 0; int NBF = 0; - double dNSOMO = NSOMO*1.0; - double nalpha = (NSOMO + MS)/2.0; - ndets = (int)binom(dNSOMO, nalpha); + //double dNSOMO = NSOMO*1.0; + //double nalpha = (NSOMO + MS)/2.0; + int nalpha = (NSOMO + MS)/2; + ndets = (int)binom((double)NSOMO, (double)nalpha); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; dettree.rootNode = malloc(sizeof(Node)); @@ -1551,6 +1587,7 @@ void getApqIJMatrixDims(int64_t Isomo, int64_t Jsomo, int64_t MS, int32_t *rowso getncsfs(NSOMOJ, MS, &NBFJ); (*rowsout) = NBFI; (*colsout) = NBFJ; + //exit(0); } void getApqIJMatrixDriver(int64_t Isomo, int64_t Jsomo, int orbp, int orbq, int64_t MS, int64_t NMO, double **CSFICSFJApqIJptr, int *rowsout, int *colsout){ @@ -1669,6 +1706,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsbftodetI, colsbftodetI; + //printf(" 1Calling convertBFtoDetBasis Isomo=%ld MS=%ld\n",Isomo,MS); convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); // Fill matrix @@ -1676,8 +1714,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int colsI = 0; //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); - //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + //printf("Isomo=%ld\n",Isomo); getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + if(Isomo == 0){ + rowsI = 1; + colsI = 1; + } + + //printf("Isomo=%ld\n",Isomo); orthoMatrixI = malloc(rowsI*colsI*sizeof(double)); @@ -1689,6 +1733,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsbftodetJ, colsbftodetJ; + //printf(" 2Calling convertBFtoDetBasis Jsomo=%ld MS=%ld\n",Jsomo,MS); convertBFtoDetBasis(Jsomo, MS, &bftodetmatrixJ, &rowsbftodetJ, &colsbftodetJ); int rowsJ = 0; @@ -1696,6 +1741,10 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in // Fill matrix //getOverlapMatrix(Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO); getOverlapMatrix_withDet(bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO); + if(Jsomo == 0){ + rowsJ = 1; + colsJ = 1; + } orthoMatrixJ = malloc(rowsJ*colsJ*sizeof(double)); @@ -1713,18 +1762,25 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int transA=false; int transB=false; + //printf("1Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsbftodetI,colsbftodetI,rowsA,colsA); callBlasMatxMat(bftodetmatrixI, rowsbftodetI, colsbftodetI, ApqIJ, rowsA, colsA, bfIApqIJ, transA, transB); + //printf("done\n"); // now transform I in csf basis double *CSFIApqIJ = malloc(rowsI*colsA*sizeof(double)); transA = false; transB = false; + //printf("2Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsI,colsI,colsA); callBlasMatxMat(orthoMatrixI, rowsI, colsI, bfIApqIJ, colsI, colsA, CSFIApqIJ, transA, transB); // now transform J in BF basis double *CSFIbfJApqIJ = malloc(rowsI*rowsbftodetJ*sizeof(double)); transA = false; transB = true; + //printf("3Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsA,rowsbftodetJ,colsbftodetJ); callBlasMatxMat(CSFIApqIJ, rowsI, colsA, bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, CSFIbfJApqIJ, transA, transB); // now transform J in CSF basis @@ -1735,13 +1791,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in double *tmpCSFICSFJApqIJ = malloc(rowsI*rowsJ*sizeof(double)); transA = false; transB = true; + //printf("4Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,rowsbftodetJ,rowsJ,colsJ); callBlasMatxMat(CSFIbfJApqIJ, rowsI, rowsbftodetJ, orthoMatrixJ, rowsJ, colsJ, tmpCSFICSFJApqIJ, transA, transB); // Transfer to actual buffer in Fortran order for(int i = 0; i < rowsI; i++) for(int j = 0; j < rowsJ; j++) CSFICSFJApqIJ[j*rowsI + i] = tmpCSFICSFJApqIJ[i*rowsJ + j]; - // Garbage collection free(overlapMatrixI); free(overlapMatrixJ); diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index f73362eb..cea7640c 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -1,3 +1,592 @@ +use bitmasks + + BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))] +&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ] + implicit none + !use bitmasks + BEGIN_DOC + ! Documentation for alphasI + ! Returns the associated alpha's for + ! the input configuration Icfg. + END_DOC + + integer :: idxI ! The id of the Ith CFG + integer(bit_kind) :: Icfg(N_int,2) + integer :: NalphaIcfg + logical,dimension(:,:),allocatable :: tableUniqueAlphas + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ndiffAll + integer :: i,ii + integer :: j,jj + integer :: k,kk + integer :: kstart + integer :: kend + integer :: Nsomo_I + integer :: hole + integer :: p + integer :: q + integer :: countalphas + logical :: pqAlreadyGenQ + logical :: pqExistsQ + logical :: ppExistsQ + integer*8 :: MS + + double precision :: t0, t1 + call wall_time(t0) + + MS = elec_alpha_num-elec_beta_num + + allocate(tableUniqueAlphas(mo_num,mo_num)) + NalphaIcfg_list = 0 + + do idxI = 1, N_configuration + + Icfg = psi_configuration(:,:,idxI) + + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 2 + endif + end do + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do ii = 1,n_act_orb + i = list_act(ii) + if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then + if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 2 + end if + end if + end do + + tableUniqueAlphas = .FALSE. + + ! Now find the allowed (p,q) excitations + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + Nsomo_I = POPCNT(Isomo) + if(Nsomo_I .EQ. 0) then + kstart = 1 + else + kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) + endif + kend = idxI-1 + + do i = 1,nholes + p = listholes(i) + do j = 1,nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Jsomo).ge.MS)then + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else + cycle + endif + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + kstart = cfg_seniority_index(Nsomo_I) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Jsomo).lt.MS)then + cycle + endif + + ! Again, we don't have to search from 1 + ! we just use seniority to find the + ! first index with NSOMO - 2 to NSOMO + 2 + ! this is what is done in kstart, kend + + pqAlreadyGenQ = .FALSE. + ! First check if it can be generated before + do k = kstart, kend + diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle + diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then + pqAlreadyGenQ = .TRUE. + ppExistsQ = .TRUE. + EXIT + endif + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + pqAlreadyGenQ = .TRUE. + EXIT + endif + end do + + if(pqAlreadyGenQ) cycle + + pqExistsQ = .FALSE. + + if(.NOT. pqExistsQ) then + tableUniqueAlphas(p,q) = .TRUE. + endif + end do + end do + + !print *,tableUniqueAlphas(:,:) + + ! prune list of alphas + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + Jsomo = Icfg(1,1) + Jdomo = Icfg(1,2) + NalphaIcfg = 0 + do i = 1, nholes + p = listholes(i) + do j = 1, nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(tableUniqueAlphas(p,q)) then + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + if(POPCNT(Jsomo).ge.MS)then + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else + cycle + endif + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! SOMO + !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + if(POPCNT(Jsomo) .ge. NSOMOMin) then + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo + alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + NalphaIcfg_list(idxI) = NalphaIcfg + endif + endif + end do + end do + + ! Check if this Icfg has been previously generated as a mono + ppExistsQ = .False. + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + do k = kstart, idxI-1 + diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if (ndiffSOMO /= 2) cycle + diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then + ppExistsQ = .TRUE. + EXIT + endif + end do + ! Diagonal part (pp,qq) + if(nholes > 0 .AND. (.NOT. ppExistsQ))then + ! SOMO + if(POPCNT(Jsomo) .ge. NSOMOMin) then + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1) + alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2) + NalphaIcfg_list(idxI) = NalphaIcfg + endif + endif + + NalphaIcfg = 0 + enddo ! end loop idxI + call wall_time(t1) + print *, 'Preparation : ', t1 - t0 + +END_PROVIDER + + subroutine obtain_associated_alphaI(idxI, Icfg, alphasIcfg, NalphaIcfg) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for alphasI + ! Returns the associated alpha's for + ! the input configuration Icfg. + END_DOC + + integer,intent(in) :: idxI ! The id of the Ith CFG + integer(bit_kind),intent(in) :: Icfg(N_int,2) + integer,intent(out) :: NalphaIcfg + integer(bit_kind),intent(out) :: alphasIcfg(N_int,2,*) + logical,dimension(:,:),allocatable :: tableUniqueAlphas + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ndiffAll + integer :: i, ii + integer :: j, jj + integer :: k, kk + integer :: kstart + integer :: kend + integer :: Nsomo_I + integer :: hole + integer :: p + integer :: q + integer :: countalphas + logical :: pqAlreadyGenQ + logical :: pqExistsQ + logical :: ppExistsQ + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + !print*,"Input cfg" + !call debug_spindet(Isomo,1) + !call debug_spindet(Idomo,1) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 2 + endif + end do + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do ii = 1,n_act_orb + i = list_act(ii) + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 2 + end if + end do + + !print *,"Nvmo=",nvmos + !print *,listvmos + !print *,vmotype + + allocate(tableUniqueAlphas(mo_num,mo_num)) + tableUniqueAlphas = .FALSE. + + ! Now find the allowed (p,q) excitations + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + Nsomo_I = POPCNT(Isomo) + if(Nsomo_I .EQ. 0) then + kstart = 1 + else + kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) + endif + kend = idxI-1 + !print *,"Isomo" + !call debug_spindet(Isomo,1) + !call debug_spindet(Idomo,1) + + !print *,"Nholes=",nholes," Nvmos=",nvmos, " idxi=",idxI + !do i = 1,nholes + ! print *,i,"->",listholes(i) + !enddo + !do i = 1,nvmos + ! print *,i,"->",listvmos(i) + !enddo + + do i = 1,nholes + p = listholes(i) + do j = 1,nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + kstart = cfg_seniority_index(Nsomo_I) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! Again, we don't have to search from 1 + ! we just use seniortiy to find the + ! first index with NSOMO - 2 to NSOMO + 2 + ! this is what is done in kstart, kend + + pqAlreadyGenQ = .FALSE. + ! First check if it can be generated before + do k = kstart, kend + diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle + diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then + pqAlreadyGenQ = .TRUE. + ppExistsQ = .TRUE. + EXIT + endif + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + pqAlreadyGenQ = .TRUE. + !EXIT + !ppExistsQ = .TRUE. + !print *,i,k,ndiffSOMO,ndiffDOMO + !call debug_spindet(Jsomo,1) + !call debug_spindet(Jdomo,1) + !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1) + !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1) + EXIT + endif + end do + + !print *,"(,",p,",",q,")",pqAlreadyGenQ + + if(pqAlreadyGenQ) cycle + + pqExistsQ = .FALSE. + ! now check if this exists in the selected list + !do k = idxI+1, N_configuration + ! diffSOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jsomo),psi_configuration(1,1,k)) + ! diffDOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jdomo),psi_configuration(1,2,k)) + ! ndiffSOMO = POPCNT(diffSOMO) + ! ndiffDOMO = POPCNT(diffDOMO) + ! if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! pqExistsQ = .TRUE. + ! EXIT + ! endif + !end do + + if(.NOT. pqExistsQ) then + tableUniqueAlphas(p,q) = .TRUE. + !print *,p,q + !call debug_spindet(Jsomo,1) + !call debug_spindet(Jdomo,1) + endif + end do + end do + + !print *,tableUniqueAlphas(:,:) + + ! prune list of alphas + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + Jsomo = Icfg(1,1) + Jdomo = Icfg(1,2) + NalphaIcfg = 0 + do i = 1, nholes + p = listholes(i) + do j = 1, nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(tableUniqueAlphas(p,q)) then + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! SOMO + NalphaIcfg += 1 + !print *,i,j,"|",NalphaIcfg + alphasIcfg(1,1,NalphaIcfg) = Jsomo + alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + endif + end do + end do + + ! Check if this Icfg has been previously generated as a mono + ppExistsQ = .False. + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + do k = 1, idxI-1 + diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffSOMO = POPCNT(diffSOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + ppExistsQ = .TRUE. + EXIT + endif + end do + ! Diagonal part (pp,qq) + if(nholes > 0 .AND. (.NOT. ppExistsQ))then + ! SOMO + NalphaIcfg += 1 + !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg + !call debug_spindet(Idomo,1) + !call debug_spindet(Jdomo,1) + alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1) + alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2) + endif + + end subroutine + function getNSOMO(Icfg) result(NSOMO) implicit none integer(bit_kind),intent(in) :: Icfg(N_int,2) @@ -8,98 +597,3 @@ NSOMO += POPCNT(Icfg(i,1)) enddo end function getNSOMO - -subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel) - implicit none - BEGIN_DOC - ! This function converts the orbital ids - ! in real space to those used in model space - ! in order to identify the matrices required - ! for the calculation of MEs. - ! - ! The type of excitations are ordered as follows: - ! Type 1 - SOMO -> SOMO - ! Type 2 - DOMO -> VMO - ! Type 3 - SOMO -> VMO - ! Type 4 - DOMO -> SOMO - END_DOC - integer(bit_kind),intent(in) :: Ialpha(N_int,2) - integer(bit_kind),intent(in) :: Jcfg(N_int,2) - integer,intent(in) :: p,q - integer,intent(in) :: extype - integer,intent(out) :: pmodel,qmodel - integer*8 :: Isomo - integer*8 :: Idomo - integer*8 :: Jsomo - integer*8 :: Jdomo - integer*8 :: mask - integer*8 :: Isomotmp - integer*8 :: Jsomotmp - integer :: pos0,pos0prev - - ! TODO Flag (print) when model space indices is > 64 - Isomo = Ialpha(1,1) - Idomo = Ialpha(1,2) - Jsomo = Jcfg(1,1) - Jdomo = Jcfg(1,2) - pos0prev = 0 - pmodel = p - qmodel = q - - if(p .EQ. q) then - pmodel = 1 - qmodel = 1 - else - !print *,"input pq=",p,q,"extype=",extype - !call debug_spindet(Isomo,1) - !call debug_spindet(Idomo,1) - !call debug_spindet(Jsomo,1) - !call debug_spindet(Jdomo,1) - select case(extype) - case (1) - ! SOMO -> SOMO - ! remove all domos - !print *,"type -> SOMO -> SOMO" - mask = ISHFT(1_8,p) - 1 - Isomotmp = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Isomotmp = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - case (2) - ! DOMO -> VMO - ! remove all domos except one at p - !print *,"type -> DOMO -> VMO" - mask = ISHFT(1_8,p) - 1 - Jsomotmp = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomotmp = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - case (3) - ! SOMO -> VMO - !print *,"type -> SOMO -> VMO" - !Isomo = IEOR(Isomo,Jsomo) - mask = ISHFT(1_8,p) - 1 - Isomo = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomo = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - case (4) - ! DOMO -> SOMO - ! remove all domos except one at p - !print *,"type -> DOMO -> SOMO" - !Isomo = IEOR(Isomo,Jsomo) - mask = ISHFT(1_8,p) - 1 - Jsomo = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - mask = ISHFT(1_8,q) - 1 - Isomo = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - case default - print *,"something is wrong in convertOrbIdsToModelSpaceIds" - end select - endif - !print *,p,q,"model ids=",pmodel,qmodel -end subroutine convertOrbIdsToModelSpaceIds diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f index 8e2a513c..c11a49a4 100644 --- a/src/csf/configurations.irp.f +++ b/src/csf/configurations.irp.f @@ -458,8 +458,9 @@ end END_PROVIDER - BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ] + BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num+2) ] &BEGIN_PROVIDER [ integer, cfg_nsomo_max ] +&BEGIN_PROVIDER [ integer, cfg_nsomo_min ] implicit none BEGIN_DOC ! Returns the index in psi_configuration of the first cfg with @@ -467,9 +468,10 @@ END_PROVIDER ! ! cfg_nsomo_max : Max number of SOMO in the current wave function END_DOC - integer :: i, k, s, sold + integer :: i, k, s, sold, soldmin cfg_seniority_index(:) = -1 sold = -1 + soldmin = 2000 cfg_nsomo_max = 0 do i=1,N_configuration s = 0 @@ -482,6 +484,10 @@ END_PROVIDER cfg_seniority_index(s) = i cfg_nsomo_max = s endif + if (soldmin .GT. s ) then + soldmin = s + cfg_nsomo_min = s + endif enddo END_PROVIDER @@ -743,41 +749,112 @@ BEGIN_PROVIDER [ integer(bit_kind), dominant_dets_of_cfgs, (N_int,2,N_dominant_d enddo END_PROVIDER -subroutine binary_search_cfg(cfgInp,addcfg) +subroutine binary_search_cfg(cfgInp,addcfg,bit_tmp) use bitmasks implicit none BEGIN_DOC ! Documentation for binary_search - ! - ! Does a binary search to find + ! + ! Does a binary search to find ! the address of a configuration in a list of ! configurations. END_DOC integer(bit_kind), intent(in) :: cfgInp(N_int,2) integer , intent(out) :: addcfg - integer :: i,j,k,r,l - integer*8 :: key, key2 - logical :: found - !integer*8, allocatable :: bit_tmp(:) - !integer*8, external :: configuration_search_key + integer*8, intent(in) :: bit_tmp(0:N_configuration+1) - !allocate(bit_tmp(0:N_configuration)) - !bit_tmp(0) = 0 - do i=1,N_configuration - !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int) - found = .True. - do k=1,N_int - found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) & - .and. (psi_configuration(k,2,i) == cfgInp(k,2)) - enddo - if (found) then - addcfg = i - exit + logical :: found + integer :: l, r, j, k + integer*8 :: key + + integer*8, external :: configuration_search_key + + key = configuration_search_key(cfgInp,N_int) + + ! Binary search + l = 0 + r = N_configuration+1 +IRP_IF WITHOUT_SHIFTRL + j = ishft(r-l,-1) +IRP_ELSE + j = shiftr(r-l,1) +IRP_ENDIF + do while (j>=1) + j = j+l + if (bit_tmp(j) == key) then + ! Find 1st element which matches the key + if (j > 1) then + do while (j>1 .and. bit_tmp(j-1) == key) + j = j-1 + enddo + endif + ! Find correct element matching the key + do while (bit_tmp(j) == key) + found = .True. + do k=1,N_int + found = found .and. (psi_configuration(k,1,j) == cfgInp(k,1))& + .and. (psi_configuration(k,2,j) == cfgInp(k,2)) + enddo + if (found) then + addcfg = j + return + endif + j = j+1 + enddo + addcfg = -1 + return + else if (bit_tmp(j) > key) then + r = j + else + l = j endif +IRP_IF WITHOUT_SHIFTRL + j = ishft(r-l,-1) +IRP_ELSE + j = shiftr(r-l,1) +IRP_ENDIF enddo + addcfg = -1 + return + end subroutine +!subroutine binary_search_cfg(cfgInp,addcfg) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Documentation for binary_search +! ! +! ! Does a binary search to find +! ! the address of a configuration in a list of +! ! configurations. +! END_DOC +! integer(bit_kind), intent(in) :: cfgInp(N_int,2) +! integer , intent(out) :: addcfg +! integer :: i,j,k,r,l +! integer*8 :: key, key2 +! logical :: found +! !integer*8, allocatable :: bit_tmp(:) +! !integer*8, external :: configuration_search_key +! +! !allocate(bit_tmp(0:N_configuration)) +! !bit_tmp(0) = 0 +! do i=1,N_configuration +! !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int) +! found = .True. +! do k=1,N_int +! found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) & +! .and. (psi_configuration(k,2,i) == cfgInp(k,2)) +! enddo +! if (found) then +! addcfg = i +! exit +! endif +! enddo +! +!end subroutine +! BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det, (2,N_configuration) ] &BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det_data, (N_det) ] diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f index fecc6123..bdd8b327 100644 --- a/src/csf/conversion.irp.f +++ b/src/csf/conversion.irp.f @@ -1,3 +1,16 @@ +BEGIN_PROVIDER [ double precision, psi_csf_coef, (N_csf, N_states) ] + implicit none + BEGIN_DOC + ! Wafe function in CSF basis + END_DOC + + double precision, allocatable :: buffer(:,:) + allocate ( buffer(N_det, N_states) ) + buffer(1:N_det, 1:N_states) = psi_coef(1:N_det, 1:N_states) + call convertWFfromDETtoCSF(N_states, buffer, psi_csf_coef) +END_PROVIDER + + subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) use cfunctions use bitmasks @@ -12,7 +25,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) double precision, intent(out) :: psi_coef_cfg_out(n_CSF,N_st) integer*8 :: Isomo, Idomo, mask integer(bit_kind) :: Ialpha(N_int) ,Ibeta(N_int) - integer :: rows, cols, i, j, k + integer :: rows, cols, i, j, k, salpha integer :: startdet, enddet integer :: ndetI integer :: getNSOMO @@ -26,6 +39,8 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) integer s, bfIcfg integer countcsf + integer MS + MS = elec_alpha_num-elec_beta_num countcsf = 0 phasedet = 1.0d0 do i = 1,N_configuration @@ -44,12 +59,19 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) enddo enddo - s = 0 + s = 0 ! s == total number of SOMOs do k=1,N_int if (psi_configuration(k,1,i) == 0_bit_kind) cycle s = s + popcnt(psi_configuration(k,1,i)) enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + + if(iand(s,1) .EQ. 0) then + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + else + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + endif ! perhaps blocking with CFGs of same seniority ! can be more efficient @@ -80,7 +102,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) double precision,intent(in) :: psi_coef_cfg_in(n_CSF,N_st) double precision,intent(out) :: psi_coef_det(N_det,N_st) double precision :: tmp_psi_coef_det(maxDetDimPerBF,N_st) - integer :: s, bfIcfg + integer :: s, bfIcfg, salpha integer :: countcsf integer(bit_kind) :: Ialpha(N_int), Ibeta(N_int) integer :: rows, cols, i, j, k @@ -91,6 +113,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) double precision,allocatable :: tempCoeff (:,:) double precision :: phasedet integer :: idx + integer MS + MS = elec_alpha_num-elec_beta_num countcsf = 0 @@ -104,7 +128,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) if (psi_configuration(k,1,i) == 0_bit_kind) cycle s = s + popcnt(psi_configuration(k,1,i)) enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) allocate(tempCoeff(bfIcfg,N_st)) diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f index c1560148..1c59d579 100644 --- a/src/csf/create_excitations.irp.f +++ b/src/csf/create_excitations.irp.f @@ -226,7 +226,7 @@ subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint) enddo end -subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) +subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) implicit none use bitmasks BEGIN_DOC @@ -238,6 +238,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin ! ex_type_singles : on output contains type of excitations : ! END_DOC + integer*8, intent(in) :: bit_tmp(0:N_configuration+1) integer, intent(in) :: Nint integer, intent(inout) :: n_singles integer, intent(out) :: idxs_singles(*) @@ -248,20 +249,26 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin integer(bit_kind) :: Jdet(Nint,2) integer :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg + integer :: ii,kk integer(bit_kind) :: single(Nint,2) logical :: i_ok + n_singles = 0 !TODO !Make list of Somo and Domo for holes !Make list of Unocc and Somo for particles - do i_hole = 1+n_core_orb, n_core_orb + n_act_orb - do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + !do i_hole = 1+n_core_orb, n_core_orb + n_act_orb + do ii = 1, n_act_orb + i_hole = list_act(ii) + !do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + do kk = 1, n_act_orb + i_particle = list_act(kk) if(i_hole .EQ. i_particle) cycle addcfg = -1 call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok) if (i_ok) then - call binary_search_cfg(single,addcfg) + call binary_search_cfg(single,addcfg,bit_tmp) if(addcfg .EQ. -1) cycle n_singles = n_singles + 1 do k=1,Nint diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f new file mode 100644 index 00000000..7d7ae09b --- /dev/null +++ b/src/csf/obtain_I_foralpha.irp.f @@ -0,0 +1,397 @@ +subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI,ntotalconnectedI) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! givenI by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer(bit_kind),intent(in) :: givenI(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: ntotalconnectedI + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_I + + ! + ! 2 2 1 1 0 0 : 1 1 0 0 0 0 + ! 0 0 1 1 0 0 + ! + ! 2 1 1 1 1 0 : 1 0 0 0 0 0 + ! 0 1 1 1 1 0 + !xorS 0 1 0 0 1 0 : 2 + !xorD 0 1 0 0 0 0 : 1 + !xorSD 0 0 0 0 1 0 : 1 + ! ----- + ! 4 + ! 1 1 1 1 1 1 : 0 0 0 0 0 0 + ! 1 1 1 1 1 1 + ! 1 1 0 0 1 1 : 4 + ! 1 1 0 0 0 0 : 2 + ! 0 0 0 0 1 1 : 2 + ! ----- + ! 8 + ! + + nconnectedI = 0 + ntotalconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = givenI(1,1) + Idomo = givenI(1,2) + Nsomo_I = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + !end_index = N_configuration + !print *,"Start and End = ",idxI, end_index + + + p = 0 + q = 0 + do i=idxI,end_index + !if(.True.) then + ! nconnectedI += 1 + ! connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + ! idxs_connectedI(nconnectedI)=i + ! cycle + !endif + Isomo = givenI(1,1) + Idomo = givenI(1,2) + Jsomo = psi_configuration(1,1,i) + Jdomo = psi_configuration(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + !------- + ! MONO | + !------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then + !---------------------------- + ! DOMO -> VMO + DOMO -> VMO | + !---------------------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then + !---------------------------- + ! DOUBLE + !---------------------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then + !----------------- + ! DOUBLE + !----------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then + !----------------- + ! DOUBLE + !----------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + !-------- + ! I = I | + !-------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)= i + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes) + endif + end do + +end subroutine obtain_connected_J_givenI + +subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! Ialpha by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: excitationIds(2,*) + integer,intent(out) :: excitationTypes(*) + real*8 ,intent(out) :: diagfactors(*) + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + integer*8 :: MS + MS = elec_alpha_num-elec_beta_num + + nconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Nsomo_alpha = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + end_index = N_configuration + + + p = 0 + q = 0 + if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1' + do i=idxI,end_index + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = psi_configuration(1,1,i) + Jdomo = psi_configuration(1,2,i) + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Isomo).lt.MS)then + cycle + endif + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + !if(idxI.eq.1)then + ! print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo) + !endif + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + select case(ndiffDOMO) + case (0) + ! SOMO -> VMO + !print *,"obt SOMO -> VMO" + extyp = 3 + IJsomo = IEOR(Isomo, Jsomo) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 +!IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IJsomo) + 1 +!IRP_ENDIF + case (1) + ! DOMO -> VMO + ! or + ! SOMO -> SOMO + nsomoJ = POPCNT(Jsomo) + nsomoalpha = POPCNT(Isomo) + if(nsomoJ .GT. nsomoalpha) then + ! DOMO -> VMO + !print *,"obt DOMO -> VMO" + extyp = 2 +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +!IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(Isomo) + 1 +!IRP_ENDIF + else + ! SOMO -> SOMO + !print *,"obt SOMO -> SOMO" + extyp = 1 +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +!IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,q-1) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +!IRP_ELSE + p = TRAILZ(Isomo) + 1 +!IRP_ENDIF + ! Check for Minimal alpha electrons (MS) + !if(POPCNT(Isomo).lt.MS)then + ! cycle + !endif + end if + case (2) + ! DOMO -> SOMO + !print *,"obt DOMO -> SOMO" + extyp = 4 + IJsomo = IEOR(Isomo, Jsomo) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 +!IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IJsomo) + 1 +!IRP_ENDIF + case default + print *,"something went wront in get connectedI" + end select + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + do k=1,N_int + connectedI(k,1,nconnectedI) = psi_configuration(k,1,i) + connectedI(k,2,nconnectedI) = psi_configuration(k,2,i) + enddo + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + + do k=1,nholes + p = listholes(k) + q = p + extyp = 1 + if(holetype(k) .EQ. 1) then + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 2.0d0 + endif + enddo + endif + end do + +end subroutine obtain_connected_I_foralpha diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 85ed5f84..5c3c8fe1 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1,10 +1,13 @@ real*8 function logabsgamma(x) implicit none real*8, intent(in) :: x - logabsgamma = log(abs(gamma(x))) + logabsgamma = 1.d32 ! Avoid floating point exception + if (x>0.d0) then + logabsgamma = log(abs(gamma(x))) + endif end function logabsgamma - BEGIN_PROVIDER [ integer, NSOMOMax] + &BEGIN_PROVIDER [ integer, NSOMOMin] &BEGIN_PROVIDER [ integer, NCSFMax] &BEGIN_PROVIDER [ integer*8, NMO] &BEGIN_PROVIDER [ integer, NBFMax] @@ -16,11 +19,21 @@ ! The maximum number of SOMOs for the current calculation. ! required for the calculation of prototype arrays. END_DOC + integer MS, ialpha + MS = elec_alpha_num-elec_beta_num NSOMOMax = min(elec_num, cfg_nsomo_max + 2) + if(AND(cfg_nsomo_min , 1) .eq. 0)then + NSOMOMin = max(0,cfg_nsomo_min-2) + else + NSOMOMin = max(1,cfg_nsomo_min-2) + else + NSOMOMin = max(1,cfg_nsomo_min-2) + endif ! Note that here we need NSOMOMax + 2 sizes - NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0 + ialpha = (NSOMOMax + MS)/2 + NCSFMax = max(1,nint((binom(NSOMOMax,ialpha)-binom(NSOMOMax,ialpha+1)))) ! TODO: NCSFs for MS=0 (CHECK) NBFMax = NCSFMax - maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)))) + maxDetDimPerBF = max(1,nint((binom(NSOMOMax,ialpha)))) NMO = n_act_orb integer i,j,k,l integer startdet,enddet @@ -29,63 +42,113 @@ integer dimcsfpercfg integer detDimperBF real*8 :: coeff, binom1, binom2 - integer MS integer ncfgpersomo real*8, external :: logabsgamma detDimperBF = 0 - MS = elec_alpha_num-elec_beta_num ! number of cfgs = number of dets for 0 somos + n_CSF = cfg_seniority_index(NSOMOMin)-1 + ncfgprev = cfg_seniority_index(NSOMOMin) + !do i = 0-iand(MS,1)+2, NSOMOMax,2 + !!print *," i=",0," dimcsf=",1," ncfg=",ncfgprev, " senor=",cfg_seniority_index(0) + !!do i = NSOMOMin+2, NSOMOMax,2 + !! if(cfg_seniority_index(i) .EQ. -1)then + !! ncfgpersomo = N_configuration + 1 + !! else + !! ncfgpersomo = cfg_seniority_index(i) + !! endif + !!ncfg = ncfgpersomo - ncfgprev + !!!detDimperBF = max(1,nint((binom(i,(i+1)/2)))) + !!!dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) + !!n_CSF += ncfg * dimcsfpercfg + !!!if(cfg_seniority_index(i+2) == -1) EXIT + !!!if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF + !!ncfgprev = cfg_seniority_index(i) + !!print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i) + !!enddo + !!print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration n_CSF = 0 - ncfgprev = cfg_seniority_index(0) - ncfgpersomo = ncfgprev - do i = iand(MS,1), NSOMOMax-2,2 - if(cfg_seniority_index(i) .EQ. -1) then - cycle - endif - if(cfg_seniority_index(i+2) .EQ. -1) then - ncfgpersomo = N_configuration + 1 + !ncfgprev = cfg_seniority_index(0) + !ncfgpersomo = ncfgprev + !do i = iand(MS,1), NSOMOMax-2,2 + ! if(cfg_seniority_index(i) .EQ. -1) then + ! cycle + ! endif + ! if(cfg_seniority_index(i+2) .EQ. -1) then + ! ncfgpersomo = N_configuration + 1 + ! else + ! if(cfg_seniority_index(i+2) > ncfgpersomo) then + ! ncfgpersomo = cfg_seniority_index(i+2) + ! else + ! k = 0 + ! do while(cfg_seniority_index(i+2+k) < ncfgpersomo) + ! k = k + 2 + ! ncfgpersomo = cfg_seniority_index(i+2+k) + ! enddo + ! endif + ! endif + ! ncfg = ncfgpersomo - ncfgprev + ! if(i .EQ. 0 .OR. i .EQ. 1) then + ! dimcsfpercfg = 1 + ! elseif( i .EQ. 3) then + ! dimcsfpercfg = 2 + ! else + ! if(iand(MS,1) .EQ. 0) then + ! dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) + ! else + ! dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) + ! endif + ! endif + ! n_CSF += ncfg * dimcsfpercfg + ! print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i) + ! if(cfg_seniority_index(i+2) > ncfgprev) then + ! ncfgprev = cfg_seniority_index(i+2) + ! else + ! k = 0 + ! do while(cfg_seniority_index(i+2+k) < ncfgprev) + ! k = k + 2 + ! ncfgprev = cfg_seniority_index(i+2+k) + ! enddo + ! endif + !enddo + n_CSF = 0 + !print *," -9(((((((((((((( NSOMOMin=",NSOMOMin + ncfgprev = cfg_seniority_index(NSOMOMin) ! can be -1 + if(ncfgprev.eq.-1)then + ncfgprev=1 + endif + do i=NSOMOMin,NSOMOMax+2,2 + !k=0 + !do while((cfg_seniority_index(i+2+k) .eq. -1) .and. (k.le.NSOMOMax)) + ! k=k+2 + !end do + if(cfg_seniority_index(i).eq.-1)cycle + if(cfg_seniority_index(i+2).eq.-1)then + ncfg = N_configuration - ncfgprev + 1 + if(ncfg .eq. 0)then + ncfg=1 + endif else - if(cfg_seniority_index(i+2) > ncfgpersomo) then - ncfgpersomo = cfg_seniority_index(i+2) + ncfg = cfg_seniority_index(i+2) - ncfgprev + endif + if(i .EQ. 0 .OR. i .EQ. 1) then + dimcsfpercfg = 1 + elseif( i .EQ. 3) then + dimcsfpercfg = 2 + else + if(iand(MS,1) .EQ. 0) then + ialpha = (i + MS)/2 + dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1)))) else - k = 0 - do while(cfg_seniority_index(i+2+k) < ncfgpersomo) - k = k + 2 - ncfgpersomo = cfg_seniority_index(i+2+k) - enddo + ialpha = (i + MS)/2 + dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1)))) endif endif - ncfg = ncfgpersomo - ncfgprev - if(iand(MS,1) .EQ. 0) then - !dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) - binom1 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*((i/2)+1)) & - - logabsgamma(1.0d0*(i-((i/2))+1))); - binom2 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*(((i/2)+1)+1)) & - - logabsgamma(1.0d0*(i-((i/2)+1)+1))); - dimcsfpercfg = max(1,nint(binom1 - binom2)) - else - !dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) - binom1 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*(((i+1)/2)+1)) & - - logabsgamma(1.0d0*(i-(((i+1)/2))+1))); - binom2 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*((((i+3)/2)+1)+1)) & - - logabsgamma(1.0d0*(i-(((i+3)/2)+1)+1))); - dimcsfpercfg = max(1,nint(binom1 - binom2)) - endif - n_CSF += ncfg * dimcsfpercfg - if(cfg_seniority_index(i+2) > ncfgprev) then - ncfgprev = cfg_seniority_index(i+2) - else - k = 0 - do while(cfg_seniority_index(i+2+k) < ncfgprev) - k = k + 2 - ncfgprev = cfg_seniority_index(i+2+k) - enddo - endif - enddo + n_CSF += ncfg*dimcsfpercfg + !print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i) + ncfgprev = cfg_seniority_index(i+2) + end do + !print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration + END_PROVIDER @@ -106,7 +169,7 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) real*8,intent(out) :: phaseout integer(bit_kind) :: mask, deta(N_int), detb(N_int) integer :: nbetas - integer :: k + integer :: count, k ! Initialize deta and detb deta = Ialpha @@ -146,7 +209,117 @@ end subroutine get_phase_qp_to_cfg - BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,2)] + BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] + &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF,1)] + &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] + &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)] + use cfunctions + implicit none + BEGIN_DOC + ! Documentation for DetToCSFTransformationMatrix + ! Provides the matrix of transformatons for the + ! conversion between determinant to CSF basis (in BFs) + END_DOC + integer*8 :: Isomo, Idomo + integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int) + integer :: rows, cols, i, j, k + integer :: startdet, enddet, idx + integer*8 MS, salpha + integer ndetI + integer :: getNSOMO + real*8,dimension(:,:),allocatable :: tempBuffer + real*8,dimension(:),allocatable :: tempCoeff + real*8 :: norm_det1, phasedet + + integer :: nt + + + norm_det1 = 0.d0 + MS = elec_alpha_num - elec_beta_num + ! initialization + psi_coef_config = 0.d0 + DetToCSFTransformationMatrix(0,:,:) = 1.d0 + do i = 2-iand(MS,1_8), NSOMOMax,2 + Isomo = IBSET(0_8, i) - 1_8 + ! rows = Ncsfs + ! cols = Ndets + salpha = (i+MS)/2 + bfIcfg = max(1,nint((binom(i,salpha)-binom(i,salpha+1)))) + ndetI = max(1,nint((binom(i,salpha)))) + !bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) + !ndetI = max(1,nint((binom(i,(i+1)/2)))) + + allocate(tempBuffer(bfIcfg,ndetI)) + call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) + DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) + deallocate(tempBuffer) + enddo + + integer s, bfIcfg + integer countcsf + countcsf = 0 + integer countdet + countdet = 0 + integer istate + istate = 1 + psi_csf_to_config_data(1) = 1 + phasedet = 1.0d0 + call omp_set_max_active_levels(1) + !$OMP PARALLEL + !$OMP MASTER + do i = 1,N_configuration + startdet = psi_configuration_to_psi_det(1,i) + enddet = psi_configuration_to_psi_det(2,i) + ndetI = enddet-startdet+1 + + allocate(tempCoeff(ndetI)) + countdet = 1 + do j = startdet, enddet + idx = psi_configuration_to_psi_det_data(j) + Ialpha(:) = psi_det(:,1,idx) + Ibeta(:) = psi_det(:,2,idx) + call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) + tempCoeff(countdet) = psi_coef(idx, istate)*phasedet + norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) + countdet += 1 + enddo + + !print *,"dimcoef=",bfIcfg,norm_det1 + !call printMatrix(tempCoeff,ndetI,1) + + s = 0 + do k=1,N_int + if (psi_configuration(k,1,i) == 0_bit_kind) cycle + s = s + popcnt(psi_configuration(k,1,i)) + enddo + salpha = (s+MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + !bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + + ! perhaps blocking with CFGs of same seniority + ! can be more efficient + allocate(tempBuffer(bfIcfg,ndetI)) + tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) + + call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1)) + !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) + + deallocate(tempCoeff) + deallocate(tempBuffer) + psi_config_data(i,1) = countcsf + 1 + do k=1,bfIcfg + psi_csf_to_config_data(countcsf+k) = i + enddo + countcsf += bfIcfg + psi_config_data(i,2) = countcsf + enddo + !$OMP END MASTER + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + END_PROVIDER + + BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,2)] &BEGIN_PROVIDER [ integer, rowsmax] &BEGIN_PROVIDER [ integer, colsmax] use cfunctions @@ -165,14 +338,18 @@ end subroutine get_phase_qp_to_cfg cols = -1 integer*8 MS MS = elec_alpha_num-elec_beta_num - integer nsomomin nsomomin = elec_alpha_num-elec_beta_num rowsmax = 0 colsmax = 0 + !print *,"NSOMOMax = ",NSOMOMax + !print *,"NSOMOMin = ",NSOMOMin !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) ! Type ! 1. SOMO -> SOMO - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + !print *,"Doing SOMO->SOMO" + AIJpqMatrixDimsList(NSOMOMin,1,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,1,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i-2,i-2, 2 Jsomo = ISHFT(1_8,j)-1 @@ -199,6 +376,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, "SOMO->SOMO \t",i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -206,15 +384,18 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,1) = rows - AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,2) = cols + AIJpqMatrixDimsList(nsomoi,1,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,1,k,l,2) = cols end do end do end do end do ! Type ! 2. DOMO -> VMO - do i = 0+iand(nsomomin,1), NSOMOMax, 2 + !print *,"Doing DOMO->VMO" + AIJpqMatrixDimsList(NSOMOMin,2,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,2,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 tmpsomo = ISHFT(1_8,i+2)-1 do j = i+2,i+2, 2 @@ -247,6 +428,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -254,8 +436,8 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,1) = rows - AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,2) = cols + AIJpqMatrixDimsList(nsomoi,2,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,2,k,l,2) = cols end do end do end do @@ -263,15 +445,17 @@ end subroutine get_phase_qp_to_cfg ! Type ! 3. SOMO -> VMO !print *,"Doing SOMO->VMO" - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + AIJpqMatrixDimsList(NSOMOMin,3,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,3,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,j)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) then cycle end if - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,l-1) @@ -286,6 +470,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -293,25 +478,27 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(i,j,3,k,l,1) = rows - AIJpqMatrixDimsList(i,j,3,k,l,2) = cols + AIJpqMatrixDimsList(i,3,k,l,1) = rows + AIJpqMatrixDimsList(i,3,k,l,2) = cols end do end do end do end do ! Type - ! 4. DOMO -> VMO + ! 4. DOMO -> SOMO !print *,"Doing DOMO->SOMO" - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + AIJpqMatrixDimsList(NSOMOMin,4,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,4,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 do j = i,i, 2 if(j .GT. NSOMOMax .OR. j .LE. 0) then cycle end if - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 - Isomo = IBCLR(Isomo,k+1-1) + Isomo = IBCLR(Isomo,k-1) Jsomo = ISHFT(1_8,j+1)-1 Jsomo = IBCLR(Jsomo,l-1) else @@ -323,6 +510,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -330,15 +518,16 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(i,j,4,k,l,1) = rows - AIJpqMatrixDimsList(i,j,4,k,l,2) = cols + AIJpqMatrixDimsList(i,4,k,l,1) = rows + AIJpqMatrixDimsList(i,4,k,l,2) = cols end do end do end do end do + !print *,"Rowsmax=",rowsmax," Colsmax=",colsmax END_PROVIDER - BEGIN_PROVIDER [ real*8, AIJpqContainer, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,NBFMax,NBFMax)] + BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)] use cfunctions implicit none BEGIN_DOC @@ -368,70 +557,79 @@ end subroutine get_phase_qp_to_cfg rows = -1 cols = -1 integer*8 MS - MS = 0 - touch AIJpqMatrixDimsList + MS = elec_alpha_num-elec_beta_num real*8,dimension(:,:),allocatable :: meMatrix integer maxdim - !maxdim = max(rowsmax,colsmax) - ! allocate matrix - !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) + ! Type ! 1. SOMO -> SOMO - do i = 2, NSOMOMax, 2 + AIJpqContainer = 0.d0 + AIJpqContainer(1,1,1,1,1,NSOMOMin) = 1.0d0 + integer :: rows_old, cols_old + rows_old = -1 + cols_old = -1 + allocate(meMatrix(1,1)) + do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 - do j = i-2,i-2, 2 - if(j .GT. NSOMOMax .OR. j .LT. 0) cycle - do k = 1,i - do l = 1,i + j=i-2 + if(j .GT. NSOMOMax .OR. j .LT. 0) cycle + nsomoi = i + do k = 1,i + orbp = k + do l = 1,i - ! Define Jsomo - if(k .NE. l) then - Jsomo = IBCLR(Isomo, k-1) - Jsomo = IBCLR(Jsomo, l-1) - nsomoi = i - nsomoj = j - else - Isomo = ISHFT(1_8,i)-1 - Jsomo = ISHFT(1_8,i)-1 - nsomoi = i - nsomoj = i - endif + ! Define Jsomo + if(k .NE. l) then + Jsomo = IBCLR(Isomo, k-1) + Jsomo = IBCLR(Jsomo, l-1) + nsomoj = j + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,i)-1 + nsomoj = i + endif - AIJpqContainer(nsomoi,nsomoj,1,k,l,:,:) = 0.0d0 - call getApqIJMatrixDims(Isomo, & - Jsomo, & - MS, & - rows, & - cols) + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) - orbp = k - orbq = l - allocate(meMatrix(rows,cols)) - meMatrix = 0.0d0 - ! fill matrix - call getApqIJMatrixDriver(Isomo, & - Jsomo, & - orbp, & - orbq, & - MS, & - NMO, & - meMatrix, & - rows, & - cols) - ! i -> j - do ri = 1,rows - do ci = 1,cols - AIJpqContainer(nsomoi,nsomoj,1,k,l,ri,ci) = meMatrix(ri, ci) - end do + orbq = l + if ((rows /= rows_old).or.(cols /= cols_old)) then + deallocate(meMatrix) + allocate(meMatrix(rows,cols)) + rows_old = rows + cols_old = cols + endif + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(ri,ci,k,l,1,nsomoi) = meMatrix(ri, ci) end do - deallocate(meMatrix) end do end do end do end do + deallocate(meMatrix) + ! Type ! 2. DOMO -> VMO - do i = 0, NSOMOMax, 2 + !print *,"Doing DOMO -> VMO" + !AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,2,NSOMOMin) = 1.0d0 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 tmpsomo = ISHFT(1_8,i+2)-1 do j = i+2,i+2, 2 @@ -456,7 +654,12 @@ end subroutine get_phase_qp_to_cfg nsomoj = j endif - AIJpqContainer(nsomoi,nsomoj,2,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,2,nsomoi) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -477,10 +680,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax + !call printMatrix(meMatrix,rows,cols) ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(nsomoi,nsomoj,2,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,2,nsomoi) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -490,13 +696,16 @@ end subroutine get_phase_qp_to_cfg end do ! Type ! 3. SOMO -> VMO - do i = 2, NSOMOMax, 2 + !print *,"Doing SOMO -> VMO" + !AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1:2,1:2,3,NSOMOMin) = 1.0d0 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,j)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) cycle - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,l-1) @@ -507,7 +716,12 @@ end subroutine get_phase_qp_to_cfg Jsomo = ISHFT(1_8,j)-1 endif - AIJpqContainer(i,j,3,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(i,3,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,3,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -528,10 +742,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !call printMatrix(meMatrix,rows,cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(i,j,3,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,3,i) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -541,24 +758,35 @@ end subroutine get_phase_qp_to_cfg end do ! Type ! 4. DOMO -> SOMO - do i = 2, NSOMOMax, 2 + !print *,"Doing DOMO -> SOMO" + !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,2,2,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,2,1,4,NSOMOMin) =-1.0d0 + AIJpqContainer(1,1,1,2,4,NSOMOMin) =-1.0d0 + do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,i)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) cycle - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,k-1) Jsomo = ISHFT(1_8,j+1)-1 - Jsomo = IBCLR(Jsomo,l+1-1) + Jsomo = IBCLR(Jsomo,l-1) else Isomo = ISHFT(1_8,i)-1 Jsomo = ISHFT(1_8,j)-1 endif - AIJpqContainer(i,j,4,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(i,4,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,4,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -580,10 +808,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !call printMatrix(meMatrix,rows,cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(i,j,4,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,4,i) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -593,93 +824,1215 @@ end subroutine get_phase_qp_to_cfg end do END_PROVIDER - -!!!!!! - - BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] - &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF)] - &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] - use cfunctions +subroutine calculate_preconditioner_cfg(diag_energies) + implicit none use bitmasks + BEGIN_DOC + ! Documentation for calculate_preconditioner + ! + ! Calculates the diagonal energies of + ! the configurations in psi_configuration + ! returns : diag_energies : + END_DOC + integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj + real*8,intent(out) :: diag_energies(n_CSF) + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer :: NSOMOI + integer :: NSOMOJ + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: starti, endi, cnti, cntj, rows,cols + integer :: extype,pmodel,qmodel + integer(bit_kind) :: Icfg(N_INT,2) + integer(bit_kind) :: Jcfg(N_INT,2) + integer,external :: getNSOMO + real*8, external :: mo_two_e_integral + real*8 :: hpp + real*8 :: meCC + real*8 :: ecore + real*8 :: core_act_contrib + + !PROVIDE h_core_ri + PROVIDE core_fock_operator + PROVIDE h_act_ri + ! initialize energies + diag_energies = 0.d0 + !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num + + ! calculate core energy + !call get_core_energy(ecore) + diag_energies = core_energy - nuclear_repulsion + + ! calculate the core energy + !print *,"Core 2energy=",ref_bitmask_energy + + do i=1,N_configuration + + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + NSOMOI = getNSOMO(psi_configuration(:,:,i)) + + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + core_act_contrib = 0.0d0 + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif + enddo + ! holes in DOMO + !do k = n_core_orb+1,n_core_orb + n_act_orb + !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif + enddo + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + !do k = n_core_orb+1,n_core_orb + n_act_orb + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if + enddo + !print *,"I=",i + !call debug_spindet(psi_configuration(1,1,i),N_int) + !call debug_spindet(psi_configuration(1,2,i),N_int) + + do k=1,nholes + p = listholes(k) + noccp = holetype(k) + + + ! core-virtual + do l = 1, n_core_orb + jj = list_core(l) + core_act_contrib += noccp * (2.d0 * mo_two_e_integrals_jj(jj,p) - mo_two_e_integrals_jj_exchange(jj,p)) + enddo + + ! Calculate one-electron + ! and two-electron coulomb terms + do l=1,nholes + q = listholes(l) + noccq = holetype(l) + !print *,"--------------- K=",p," L=",q + + ! one-electron term + if(p.EQ.q) then + hpp = noccq * h_act_ri(p,q)!mo_one_e_integrals(q,q) + else + hpp = 0.d0 + endif + + + do j=starti,endi + ! coulomb term + ! (pp,qq) = + if(p.EQ.q) then + diag_energies(j) += hpp !+ 0.5d0 * (noccp * noccq * mo_two_e_integral(p,q,p,q)) + !print *,"hpp=",hpp,"diga= ",diag_energies(j) +! else +! diag_energies(j) += ! 0.5d0 * noccp * noccq * mo_two_e_integral(p,q,p,q) +! print *,"diga= ",diag_energies(j) + endif + enddo + enddo + + enddo + !print *,"I=",i," core_act=",core_act_contrib + do j=starti,endi + diag_energies(j) += core_act_contrib + end do + enddo + +end subroutine calculate_preconditioner_cfg + +subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! Ialpha by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer ,intent(in) :: nconnectedJ + integer(bit_kind),intent(in) :: listconnectedJ(N_int,2,*) + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(in) :: idslistconnectedJ(*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: excitationIds(2,*) + integer,intent(out) :: excitationTypes(*) + real*8 ,intent(out) :: diagfactors(*) + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ii,i,j,k,kk,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes, idxJ + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + logical :: isOKlistJ + + PROVIDE DetToCSFTransformationMatrix + + isOKlistJ = .False. + + nconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Nsomo_alpha = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + !end_index = N_configuration + + + p = 0 + q = 0 + do i=1,nconnectedJ + idxJ = idslistconnectedJ(i) + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = listconnectedJ(1,1,i) + Jdomo = listconnectedJ(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + select case(ndiffDOMO) + case (0) + ! SOMO -> VMO + !print *,"obt SOMO -> VMO" + extyp = 3 + IJsomo = IEOR(Isomo, Jsomo) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor( IAND(Isomo,IJsomo), IAND(Isomo,IJsomo)-1)) -1) + 1 +IRP_ELSE + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 +IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IJsomo) + 1 +IRP_ENDIF + case (1) + ! DOMO -> VMO + ! or + ! SOMO -> SOMO + nsomoJ = POPCNT(Jsomo) + nsomoalpha = POPCNT(Isomo) + if(nsomoJ .GT. nsomoalpha) then + ! DOMO -> VMO + !print *,"obt DOMO -> VMO" + extyp = 2 +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +IRP_ELSE + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(Isomo) + 1 +IRP_ENDIF + else + ! SOMO -> SOMO + !print *,"obt SOMO -> SOMO" + extyp = 1 +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,q-1) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +IRP_ELSE + p = TRAILZ(Isomo) + 1 +IRP_ENDIF + end if + case (2) + ! DOMO -> SOMO + !print *,"obt DOMO -> SOMO" + extyp = 4 + IJsomo = IEOR(Isomo, Jsomo) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor(IAND(Jsomo,IJsomo) ,IAND(Jsomo,IJsomo) -1))-1) + 1 +IRP_ELSE + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 +IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IJsomo) + 1 +IRP_ENDIF + case default + print *,"something went wront in get connectedI" + end select + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = listconnectedJ(1,1,i) + Idomo = listconnectedJ(1,2,i) + do ii = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + + do k=1,nholes + p = listholes(k) + q = p + extyp = 1 + if(holetype(k) .EQ. 1) then + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 2.0d0 + endif + enddo + endif + end do + +end subroutine obtain_connected_I_foralpha_fromfilterdlist + + +subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel) implicit none BEGIN_DOC - ! Documentation for DetToCSFTransformationMatrix - ! Provides the matrix of transformatons for the - ! conversion between determinant to CSF basis (in BFs) + ! This function converts the orbital ids + ! in real space to those used in model space + ! in order to identify the matrices required + ! for the calculation of MEs. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO END_DOC - integer(bit_kind) :: mask(N_int), Ialpha(N_int),Ibeta(N_int) - integer :: rows, cols, i, j, k - integer :: startdet, enddet - integer*8 MS, Isomo, Idomo - integer ndetI - integer :: getNSOMO - real*8,dimension(:,:),allocatable :: tempBuffer - real*8,dimension(:),allocatable :: tempCoeff - real*8 :: norm_det1, phasedet - norm_det1 = 0.d0 - MS = elec_alpha_num - elec_beta_num - ! initialization - psi_coef_config = 0.d0 - DetToCSFTransformationMatrix(0,:,:) = 1.d0 - do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2 - Isomo = IBSET(0_8, i) - 1_8 - ! rows = Ncsfs - ! cols = Ndets - bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) - ndetI = max(1,nint((binom(i,(i+1)/2)))) + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(in) :: Jcfg(N_int,2) + integer,intent(in) :: p,q + integer,intent(in) :: extype + integer,intent(out) :: pmodel,qmodel + !integer(bit_kind) :: Isomo(N_int) + !integer(bit_kind) :: Idomo(N_int) + !integer(bit_kind) :: Jsomo(N_int) + !integer(bit_kind) :: Jdomo(N_int) + integer*8 :: Isomo + integer*8 :: Idomo + integer*8 :: Jsomo + integer*8 :: Jdomo + integer*8 :: mask + integer :: iint, ipos + !integer(bit_kind) :: Isomotmp(N_int) + !integer(bit_kind) :: Jsomotmp(N_int) + integer*8 :: Isomotmp + integer*8 :: Jsomotmp + integer :: pos0,pos0prev - allocate(tempBuffer(bfIcfg,ndetI)) - call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) - DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) - deallocate(tempBuffer) + ! TODO Flag (print) when model space indices is > 64 + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = Jcfg(1,1) + Jdomo = Jcfg(1,2) + pos0prev = 0 + pmodel = p + qmodel = q + + if(p .EQ. q) then + pmodel = 1 + qmodel = 1 + else + select case(extype) + case (1) + ! SOMO -> SOMO + ! remove all domos + !print *,"type -> SOMO -> SOMO" + mask = ISHFT(1_8,p) - 1 + Isomotmp = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + mask = ISHFT(1_8,q) - 1 + Isomotmp = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + case (2) + ! DOMO -> VMO + ! remove all domos except one at p + !print *,"type -> DOMO -> VMO" + mask = ISHFT(1_8,p) - 1 + Jsomotmp = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + mask = ISHFT(1_8,q) - 1 + Jsomotmp = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + case (3) + ! SOMO -> VMO + !print *,"type -> SOMO -> VMO" + !Isomo = IEOR(Isomo,Jsomo) + if(p.LT.q) then + mask = ISHFT(1_8,p) - 1 + Isomo = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + mask = ISHFT(1_8,q) - 1 + Jsomo = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + else + mask = ISHFT(1_8,p) - 1 + Isomo = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + mask = ISHFT(1_8,q) - 1 + Jsomo = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + endif + case (4) + ! DOMO -> SOMO + ! remove all domos except one at p + !print *,"type -> DOMO -> SOMO" + !Isomo = IEOR(Isomo,Jsomo) + if(p.LT.q) then + mask = ISHFT(1_8,p) - 1 + Jsomo = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + mask = ISHFT(1_8,q) - 1 + Isomo = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + else + mask = ISHFT(1_8,p) - 1 + Jsomo = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + mask = ISHFT(1_8,q) - 1 + Isomo = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + endif + case default + print *,"something is wrong in convertOrbIdsToModelSpaceIds" + end select + endif + !print *,p,q,"model ids=",pmodel,qmodel +end subroutine convertOrbIdsToModelSpaceIds + +subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) + implicit none + use bitmasks + use omp_lib + BEGIN_DOC + ! Documentation for sigma-vector calculation + ! + ! Calculates the result of the + ! application of the hamiltonian to the + ! wavefunction in CFG basis once + ! TODO : Things prepare outside this routine + ! 1. Touch the providers for + ! a. ApqIJ containers + ! b. DET to CSF transformation matrices + ! 2. DET to CSF transcormation + ! 2. CSF to DET back transcormation + ! returns : psi_coef_out_det : + END_DOC + integer,intent(in) :: sze, istart,iend, istep, ishift, n_st + real*8,intent(in) :: psi_in(n_st,sze) + real*8,intent(out) :: psi_out(n_st,sze) + integer(bit_kind) :: Icfg(N_INT,2) + integer :: i,j,k,l,p,q,noccp,noccq, m, n, idxI, nocck,orbk + integer :: ii,jj,kk,ll,pp,qq + integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ + integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg + integer(bit_kind),dimension(:,:,:),allocatable :: singlesI + integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha + integer,dimension(:),allocatable :: idxs_singlesI + integer,dimension(:),allocatable :: idxs_connectedI_alpha + integer,dimension(:,:),allocatable :: excitationIds_single + integer,dimension(:),allocatable :: excitationTypes_single + integer,dimension(:,:),allocatable :: excitationIds + integer,dimension(:),allocatable :: excitationTypes + integer,dimension(:),allocatable :: idslistconnectedJ + real*8,dimension(:),allocatable :: diagfactors + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI + integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel + integer :: getNSOMO + integer :: totcolsTKI + integer :: rowsTKI + integer :: noccpp + integer :: istart_cfg, iend_cfg, num_threads_max + integer :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ + integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk + real*8 :: norm_coef_cfg, fac2eints + real*8 :: norm_coef_det + real*8 :: meCC1, meCC2, diagfac + real*8,dimension(:,:,:),allocatable :: TKI + real*8,dimension(:,:),allocatable :: GIJpqrs + real*8,dimension(:,:,:),allocatable :: TKIGIJ + real*8,dimension(:),allocatable :: psi_out_tmp + real*8,dimension(:,:),allocatable :: CCmattmp + real*8, external :: mo_two_e_integral + real*8, external :: get_two_e_integral + real*8,dimension(:),allocatable:: diag_energies + real*8 :: tmpvar, tmptot + real*8 :: core_act_contrib + + integer(omp_lock_kind), allocatable :: lock(:) + call omp_set_max_active_levels(1) + + !print *," sze = ",sze + allocate(lock(sze)) + do i=1,sze + call omp_init_lock(lock(i)) + enddo + !do i=1,size(psi_config_data,1) + ! print *,"i=",i," psi_cfg_data_1=",psi_config_data(i,1)," psi_cfg_data_2=",psi_config_data(i,2) + !end do + + allocate(diag_energies(n_CSF)) + call calculate_preconditioner_cfg(diag_energies) + !print *," diag energy =",diag_energies(1) + + MS = 0 + norm_coef_cfg=0.d0 + + psi_out=0.d0 + + istart_cfg = psi_csf_to_config_data(istart) + iend_cfg = psi_csf_to_config_data(iend) + + !nconnectedtotalmax = 1000 + !nconnectedmaxJ = 1000 + maxnalphas = elec_num*mo_num + Icfg(1,1) = psi_configuration(1,1,1) + Icfg(1,2) = psi_configuration(1,2,1) + allocate(listconnectedJ(N_INT,2,max(sze,10000))) + allocate(idslistconnectedJ(max(sze,10000))) + call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax) + deallocate(listconnectedJ) + deallocate(idslistconnectedJ) + + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: configuration_search_key + double precision :: diagfactors_0 + allocate( bit_tmp(0:N_configuration+1)) + do j=1,N_configuration + bit_tmp(j) = configuration_search_key(psi_configuration(1,1,j),N_int) enddo - integer s, bfIcfg - integer countcsf - countcsf = 0 - integer countdet - countdet = 0 - integer idx - integer istate - istate = 1 - phasedet = 1.0d0 - do i = 1,N_configuration - startdet = psi_configuration_to_psi_det(1,i) - enddet = psi_configuration_to_psi_det(2,i) - ndetI = enddet-startdet+1 + call omp_set_max_active_levels(1) + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,& + !$OMP holetype, vmotype, nvmos, listvmos, starti, endi, & + !$OMP nsinglesI, singlesI,idxs_singlesI,excitationIds_single,& + !$OMP excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,& + !$OMP Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,& + !$OMP nconnectedJ,listconnectedJ,idslistconnectedJ,ntotJ, & + !$OMP Nalphas_Icfg,alphas_Icfg,connectedI_alpha, & + !$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,& + !$OMP totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq, & + !$OMP colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,& + !$OMP diagfac, tmpvar, diagfactors_0) & + !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& + !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,& + !$OMP pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & + !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& + !$OMP n_core_orb, n_act_orb, list_act, n, list_core, list_core_is_built,core_act_contrib, num_threads_max,& + !$OMP n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built) - allocate(tempCoeff(ndetI)) - countdet = 1 - do j = startdet, enddet - idx = psi_configuration_to_psi_det_data(j) - Ialpha(:) = psi_det(:,1,idx) - Ibeta(:) = psi_det(:,2,idx) - call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) - tempCoeff(countdet) = psi_coef(idx, istate)*phasedet - norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) - countdet += 1 + allocate(singlesI(N_INT,2,max(sze,10000))) + allocate(idxs_singlesI(max(sze,10000))) + allocate(excitationIds_single(2,max(sze,10000))) + allocate(excitationTypes_single(max(sze,10000))) +! + + !!!====================!!! + !!! Single Excitations !!! + !!!====================!!! + + !$OMP DO SCHEDULE(dynamic,16) + do i=istart_cfg,iend_cfg + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + NSOMOI = getNSOMO(Icfg) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + ! list_act + ! list_core + ! list_core_inact + ! bitmasks + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif + enddo + ! holes in DOMO + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif + enddo + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do kk = 1,n_act_orb + k = list_act(kk) + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if + enddo + + + ! Icsf ids + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + NSOMOI = getNSOMO(Icfg) + + call generate_all_singles_cfg_with_type(bit_tmp,Icfg,singlesI,idxs_singlesI,excitationIds_single,& + excitationTypes_single,nsinglesI,N_int) + + do j = 1,nsinglesI + idxI = idxs_singlesI(j) + NSOMOJ = getNSOMO(singlesI(1,1,j)) + p = excitationIds_single(1,j) + q = excitationIds_single(2,j) + extype = excitationTypes_single(j) + ! Off diagonal terms + call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) + Jsomo = singlesI(1,1,j) + Jdomo = singlesI(1,2,j) + + ! Add the hole on J + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 2 + endif + + startj = psi_config_data(idxI,1) + endj = psi_config_data(idxI,2) + !print *,"i=",i," idxI=",idxI," startj=",startj," endj=",endj," sze=",sze + + !!! One-electron contribution !!! + do ii = starti, endi + cnti = ii-starti+1 + do jj = startj, endj + cntj = jj-startj+1 + !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) + core_act_contrib = 0.0d0 + if(p.ne.q)then + do pp=1,n_core_orb + n=list_core(pp) + core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map) + end do + endif + meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib) + !if(jj.eq.1.and.ii.eq.1)then + ! print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q + !endif + call omp_set_lock(lock(jj)) + do kk = 1,n_st + psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) + enddo + call omp_unset_lock(lock(jj)) + enddo enddo - s = 0 - do k=1,N_int - if (psi_configuration(k,1,i) == 0_bit_kind) cycle - s = s + popcnt(psi_configuration(k,1,i)) - enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) - - ! perhaps blocking with CFGs of same seniority - ! can be more efficient - allocate(tempBuffer(bfIcfg,ndetI)) - tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) - - call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1), size(psi_coef_config,1)) - !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) - - deallocate(tempCoeff) - deallocate(tempBuffer) - psi_config_data(i,1) = countcsf + 1 - countcsf += bfIcfg - psi_config_data(i,2) = countcsf + ! Undo setting in listholes + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + enddo enddo + !$OMP END DO + deallocate(singlesI) + deallocate(idxs_singlesI) + deallocate(excitationIds_single) + deallocate(excitationTypes_single) - END_PROVIDER + !print *," singles part psi(1,1)=",psi_out(1,1) + + allocate(listconnectedJ(N_INT,2,max(sze,10000))) + allocate(alphas_Icfg(N_INT,2,max(sze,10000))) + allocate(connectedI_alpha(N_INT,2,max(sze,10000))) + allocate(idxs_connectedI_alpha(max(sze,10000))) + allocate(excitationIds(2,max(sze,10000))) + allocate(excitationTypes(max(sze,10000))) + allocate(diagfactors(max(sze,10000))) + allocate(idslistconnectedJ(max(sze,10000))) + allocate(CCmattmp(n_st,NBFmax)) + + !!!====================!!! + !!! Double Excitations !!! + !!!====================!!! + + ! Loop over all selected configurations + !$OMP DO SCHEDULE(static) + do i = istart_cfg,iend_cfg + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + if(Nalphas_Icfg .GT. maxnalphas) then + print *,"Nalpha > maxnalpha" + endif + + call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) + + ! TODO : remove doubly excited for return + !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5) + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + + !call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, & + ! listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, & + ! excitationIds,excitationTypes,diagfactors) + + call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, & + nconnectedI, excitationIds, excitationTypes, diagfactors) + + + if(nconnectedI .EQ. 0) then + cycle + endif + + !if(i .EQ. 1) then + ! print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)) + !endif + + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + totcolsTKI = 0 + rowsTKI = -1 + NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype + totcolsTKI += colsikpq + rowsTKI = rowsikpq + enddo + + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the integral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs + !print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI + TKI = 0.d0 + GIJpqrs = 0.d0 + TKIGIJ = 0.d0 + + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + rowsTKI = rowsikpq + !if(i.eq.1) then + ! print *,rowsTKI,colsikpq," | ",pmodel,qmodel,extype,NSOMOalpha + !endif + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) & + * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + !if(i.eq.1) then + ! print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) + !endif + enddo + enddo + + diagfactors_0 = diagfactors(j)*0.5d0 + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + do l=1,nconnectedI + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + diagfac = diagfactors_0 * diagfactors(l)* mo_two_e_integral(mok,mol,moi,moj)! g(pq,sr) = + !print *,"p=",mok,"q=",mol,"r=",moi,"s=",moj + do m = 1,colsikpq + ! = (ik|jl) + GIJpqrs(totcolsTKI+m,l) = diagfac + enddo + enddo + totcolsTKI += colsikpq + enddo + + + ! Do big BLAS + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, & + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, & + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) + + + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + rowsTKI = rowsikpq + CCmattmp = 0.d0 + + call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0, & + TKIGIJ(1,1,j), size(TKIGIJ,1), & + AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha), & + size(AIJpqContainer,1), 0.d0, & + CCmattmp, size(CCmattmp,1) ) + + do m = 1,colsikpq + call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) + enddo + call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) + enddo + totcolsTKI += colsikpq + enddo + + deallocate(TKI) ! coefficients of CSF + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs + + enddo ! loop over alphas + enddo ! loop over I + !$OMP END DO + call omp_set_max_active_levels(4) + deallocate(CCmattmp) + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) + + !print *," psi(1,823)=",psi_out(1,823), " g(1 8, 3 15)=",mo_two_e_integral(1,8,3,15), " ncore=",n_core_orb + !print *," psi(1,1)=",psi_out(1,1) + + ! Add the diagonal contribution + !$OMP DO + do i = 1,n_CSF + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + deallocate(diag_energies) + deallocate(bit_tmp) + +end subroutine calculate_sigma_vector_cfg_nst_naive_store + + + + +subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for sigma-vector calculation + ! + ! Calculates the result of the + ! application of the hamiltonian to the + ! wavefunction in CFG basis once + ! TODO : Things prepare outside this routine + ! 1. Touch the providers for + ! a. ApqIJ containers + ! b. DET to CSF transformation matrices + ! 2. DET to CSF transcormation + ! 2. CSF to DET back transcormation + ! returns : psi_coef_out_det : + END_DOC + integer,intent(in) :: sze, istart,iend, istep, ishift, n_st + real*8,intent(in) :: psi_in(sze,n_st) + real*8,intent(out) :: psi_out(sze,n_st) + integer(bit_kind) :: Icfg(N_INT,2) + integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk + integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg + integer(bit_kind),dimension(:,:,:),allocatable :: singlesI + integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha + integer,dimension(:),allocatable :: idxs_singlesI + integer,dimension(:),allocatable :: idxs_connectedI_alpha + integer,dimension(:,:),allocatable :: excitationIds_single + integer,dimension(:),allocatable :: excitationTypes_single + integer,dimension(:,:),allocatable :: excitationIds + integer,dimension(:),allocatable :: excitationTypes + real*8,dimension(:),allocatable :: diagfactors + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI + integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel + integer :: getNSOMO + integer :: totcolsTKI + integer :: rowsTKI + integer :: noccpp + integer :: istart_cfg, iend_cfg + integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk + real*8 :: norm_coef_cfg, fac2eints + real*8 :: norm_coef_det + real*8 :: meCC1, meCC2, diagfac + real*8,dimension(:,:,:),allocatable :: TKI + real*8,dimension(:,:),allocatable :: GIJpqrs + real*8,dimension(:,:,:),allocatable :: TKIGIJ + real*8, external :: mo_two_e_integral + real*8, external :: get_two_e_integral + real*8 :: diag_energies(n_CSF) + + ! allocate + allocate(alphas_Icfg(N_INT,2,max(sze/2,100))) + allocate(singlesI(N_INT,2,max(sze/2,100))) + allocate(connectedI_alpha(N_INT,2,max(sze/2,100))) + allocate(idxs_singlesI(max(sze/2,100))) + allocate(idxs_connectedI_alpha(max(sze/2,100))) + allocate(excitationIds_single(2,max(sze/2,100))) + allocate(excitationTypes_single(max(sze/2,100))) + allocate(excitationIds(2,max(sze/2,100))) + allocate(excitationTypes(max(sze/2,100))) + allocate(diagfactors(max(sze/2,100))) + + + !print *," sze = ",sze + call calculate_preconditioner_cfg(diag_energies) + + MS = 0 + norm_coef_cfg=0.d0 + + psi_out=0.d0 + + istart_cfg = psi_csf_to_config_data(istart) + iend_cfg = psi_csf_to_config_data(iend) + + + !!! Single Excitations !!! + do i=istart_cfg,iend_cfg + print *,"I=",i + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + + ! TODO : remove doubly excited for return + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) + + totcolsTKI = 0 + rowsTKI = -1 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + if(p.EQ.q) then + NSOMOalpha = NSOMOI + endif + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + totcolsTKI += colsikpq +! if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then +! print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI +! !rowsTKI = rowsikpq +! else + rowsTKI = rowsikpq +! endif + enddo + + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs + + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + enddo + enddo + do m = 1,colsikpq + do l = 1,nconnectedI + ! = (ik|jl) + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + if(moi.EQ.mok .AND. moj.EQ.mol)then + diagfac = diagfactors(j) + diagfac *= diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + else + diagfac = diagfactors(j)*diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + !endif + endif + enddo + enddo + totcolsTKI += colsikpq + enddo + + + + ! Do big BLAS + ! TODO TKI, size(TKI,1)*size(TKI,2) + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,& + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,& + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) + + + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + & + AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j) + enddo + enddo + enddo + totcolsTKI += colsikpq + enddo + + deallocate(TKI) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs + + enddo ! loop over alphas + enddo ! loop over I + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) + + + ! Add the diagonal contribution + do i = 1,n_CSF + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo + enddo + call omp_set_max_active_levels(4) + +end subroutine calculate_sigma_vector_cfg_nst_naive_store diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c index 1266b890..91bae8b2 100644 --- a/src/csf/tree_utils.c +++ b/src/csf/tree_utils.c @@ -1,3 +1,4 @@ +#include #include "tree_utils.h" void buildTree(Tree *bftree, @@ -52,6 +53,7 @@ void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){ int icpl = 0; // keep track of the ith ms (cannot be -ve) int addr = 0; // Counts the total BF's + assert(bftree->rootNode->addr == 0); buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS); *NBF = bftree->rootNode->addr; @@ -264,6 +266,8 @@ void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){ int NSOMO=0; getSetBits(Isomo, &NSOMO); genDetsDriver(dettree, NSOMO, MS, ndets); + // Closed shell case + if(NSOMO==0) (*ndets) = 1; } @@ -311,3 +315,13 @@ void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int col break; } } + +void printRealMatrix(double *orthoMatrix, int rows, int cols){ + int i,j; + for(i=0;i 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_two_e_integrals_in_map + allocate(H_jj(sze)) + + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze,H_jj, dets_in,Nint) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(static) + do i=2,sze + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + if (dressing_state > 0) then + do k=1,N_st + do i=1,sze + H_jj(i) += u_in(i,k) * dressing_column_h(i,k) + enddo + enddo + endif + + call davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + deallocate(H_jj) +end + + +subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged) + use bitmasks + use mmap_module + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze + ! + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + integer, intent(in) :: dressing_state + double precision, intent(inout) :: u_in(dim_in,N_st_diag_in) + double precision, intent(out) :: energies(N_st_diag_in) + + integer :: iter, N_st_diag + integer :: i,j,k,l,m,kk,ii,ll + logical, intent(inout) :: converged + + double precision, external :: u_dot_v, u_dot_u + + integer :: k_pairs, kl + + integer :: iter2, itertot + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(2,N_st) + double precision :: cpu, wall + integer :: shift, shift2, itermax, istate + double precision :: r1, r2, alpha + logical :: state_ok(N_st_diag_in*davidson_sze_max) + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: cmax + double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) + double precision, allocatable :: tmpU(:,:), tmpW(:,:) + double precision, pointer :: W(:,:), W_csf(:,:) + logical :: disk_based + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + + include 'constants.include.F' + + N_st_diag = N_st_diag_in + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1 + itertot = 0 + + if (state_following) then + allocate(overlap(N_st_diag*itermax, N_st_diag*itermax)) + else + allocate(overlap(1,1)) ! avoid 'if' for deallocate + endif + overlap = 0.d0 + + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2 + + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag) &! U + + dble(sze_csf)*(N_st_diag*itermax) &! U_csf + + dble(sze)*(N_st_diag) &! W + + dble(sze_csf)*(N_st_diag*itermax) &! W_csf + + 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp + + 1.d0*(N_st_diag*itermax) &! lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_u_0_nstates_zmq + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave + + 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(r1,irp_here) + nproc_target = 1 + exit + endif + + if (r1+rss < qp_max_mem) then + exit + endif + + if (itermax > 4) then + itermax = itermax - 1 + else if (m==1.and.disk_based_davidson) then + m=0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + call write_int(6,N_st,'Number of states') + call write_int(6,N_st_diag,'Number of states in diagonalization') + call write_int(6,sze,'Number of determinants') + call write_int(6,sze_csf,'Number of CSFs') + call write_int(6,nproc_target,'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if (disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + if (disk_based) then + ! Create memory-mapped files for W and S + type(c_ptr) :: ptr_w, ptr_s + integer :: fd_s, fd_w + call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& + 8, fd_w, .False., ptr_w) + call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) + else + allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) + endif + + allocate( & + ! Large + U(sze,N_st_diag), & + U_csf(sze_csf,N_st_diag*itermax), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + residual_norm(N_st_diag), & + lambda(N_st_diag*itermax)) + + + h = 0.d0 + U = 0.d0 + y = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1)) + do k=N_st+1,N_st_diag + do i=1,sze_csf + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st) + enddo + U_csf(k,k) = u_csf(k,k) + 10.d0 + enddo + do k=1,N_st_diag + call normalize(U_csf(1,k),sze_csf) + enddo + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) + + do while (.not.converged) + itertot = itertot+1 + if (itertot == 8) then + exit + endif + + do iter=1,itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + +! if ((iter > 1).or.(itertot == 1)) then + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- + + !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + if ((sze > 100000).and.distributed_davidson) then + !call H_u_0_nstates_zmq (W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + deallocate(tmpW) + deallocate(tmpU) + else + !call H_u_0_nstates_openmp(W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + !tmpU =0.0d0 + !tmpU(1,2)=1.0d0 + double precision :: irp_rdtsc + double precision :: ticks_0, ticks_1 + integer*8 :: irp_imax + irp_imax = 1 + !ticks_0 = irp_rdtsc() + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + !ticks_1 = irp_rdtsc() + !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + + !U_csf = 0.0d0 + !U_csf(1,1) = 1.0d0 + !u_in = 0.0d0 + !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2) + !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze) + !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1)) + !do i=1,sze_csf + ! print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then + ! print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! endif + !end do + !stop + deallocate(tmpW) + deallocate(tmpU) + endif +! else +! ! Already computed in update below +! continue +! endif + + if (dressing_state > 0) then + + if (N_st == 1) then + + l = dressed_column_idx(1) + double precision :: f + f = 1.0d0/psi_coef(l,1) + do istate=1,N_st_diag + do i=1,sze + W(i,istate) += dressing_column_h(i,1) *f * U(l,istate) + W(l,istate) += dressing_column_h(i,1) *f * U(i,istate) + enddo + + enddo + + else + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + psi_coef, size(psi_coef,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + dressing_column_h, size(dressing_column_h,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + endif + endif + + !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) + + ! Compute h_kl = = + ! ------------------------------------------- + + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1), & + 0.d0, h, size(h,1)) + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), U_csf, size(U_csf,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + ! Diagonalize h + ! --------------- + + integer :: lwork, info + double precision, allocatable :: work(:) + + y = h + lwork = -1 + allocate(work(1)) + call dsygv(1,'V','U',shift2,y,size(y,1), & + s_tmp,size(s_tmp,1), lambda, work,lwork,info) + lwork = int(work(1)) + deallocate(work) + allocate(work(lwork)) + call dsygv(1,'V','U',shift2,y,size(y,1), & + s_tmp,size(s_tmp,1), lambda, work,lwork,info) + deallocate(work) + if (info /= 0) then + stop 'DSYGV Diagonalization failed' + endif + + ! Compute Energy for each eigenvector + ! ----------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, h, size(h,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, h, size(h,1)) + + do k=1,shift2 + lambda(k) = h(k,k) + enddo + + if (state_following) then + + overlap = -1.d0 + do i=1,shift2 + do k=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + enddo + + endif + + + ! Express eigenvectors of h in the csf basis + ! ------------------------------------------ + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1)) + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U) + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1)) + call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !if (without_diagonal) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & + /max(H_jj(i) - lambda (k),1.d-2) + enddo + enddo + !$OMP END PARALLEL DO + !else + ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + ! do k=1,N_st_diag + ! do i=1,sze + ! U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) + ! enddo + ! enddo + ! !$OMP END PARALLEL DO + !endif + + do k=1,N_st + residual_norm(k) = u_dot_u(U(1,k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + enddo + call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1)) + + if ((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if (iter > 1) then + if (threshold_davidson_from_pt2) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2 + else + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson + endif + endif + + do k=1,N_st + if (residual_norm(k) > 1.d8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if (converged) then + exit + endif + + logical, external :: qp_stop + if (qp_stop()) then + converged = .True. + exit + endif + + + enddo + + ! Re-contract U + ! ------------- + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + W_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,W_csf,W) + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + U_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + + enddo + + + call nullify_small_elements(sze,N_st_diag,U,size(U,1),threshold_davidson_pt2) + do k=1,N_st_diag + do i=1,sze + u_in(i,k) = U(i,k) + enddo + enddo + + do k=1,N_st_diag + energies(k) = lambda(k) + enddo + write_buffer = '======' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + if (disk_based)then + ! Remove temp files + integer, external :: getUnitAndOpen + call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w ) + fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r') + close(fd_w,status='delete') + else + deallocate(W, W_csf) + endif + + deallocate ( & + residual_norm, & + U, U_csf, overlap, & + h, y, s_tmp, & + lambda & + ) + FREE nthreads_davidson +end + + + + + + + diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 89e18d65..3020ecd8 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -89,7 +89,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, intent(out) :: energies(N_st_diag_in) integer :: iter, N_st_diag - integer :: i,j,k,l,m + integer :: i,j,k,l,m,kk logical, intent(inout) :: converged double precision, external :: u_dot_v, u_dot_u diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 68f3420d..0fd9f091 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -154,7 +154,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2, itermax, istate + integer :: shift, shift2, itermax, istate, ii double precision :: r1, r2, alpha logical :: state_ok(N_st_diag_in*davidson_sze_max) integer :: nproc_target @@ -361,7 +361,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if ((sze > 100000).and.distributed_davidson) then call H_S2_u_0_nstates_zmq (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) else + double precision :: irp_rdtsc + double precision :: ticks_0, ticks_1 + integer*8 :: irp_imax + irp_imax = 1 + !ticks_0 = irp_rdtsc() call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + !ticks_1 = irp_rdtsc() + !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" endif S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) else diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 42f4fcdb..a34637a0 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -1,9 +1,20 @@ +BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ] + implicit none + BEGIN_DOC + ! If 'det', use in Davidson + ! + ! If 'cfg', use in Davidson + END_DOC + sigma_vector_algorithm = 'det' + !sigma_vector_algorithm = 'cfg' +END_PROVIDER BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ] implicit none BEGIN_DOC ! :c:data:`n_states` lowest eigenvalues of the |CI| matrix END_DOC + PROVIDE distributed_davidson integer :: j character*(8) :: st @@ -61,7 +72,7 @@ END_PROVIDER if (diag_algorithm == 'Davidson') then if (do_csf) then -! if (sigma_vector_algorithm == 'det') then + if (sigma_vector_algorithm == 'det') then call davidson_diag_H_csf (psi_det, & CI_eigenvectors, & size(CI_eigenvectors,1), & @@ -73,14 +84,14 @@ END_PROVIDER N_int, & 0, & converged) -! else if (sigma_vector_algorithm == 'cfg') then -! call davidson_diag_H_csf(psi_det,CI_eigenvectors, & -! size(CI_eigenvectors,1),CI_electronic_energy, & -! N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) -! else -! print *, irp_here -! stop 'bug' -! endif + else if (sigma_vector_algorithm == 'cfg') then + call davidson_diag_H_cfg(psi_det,CI_eigenvectors, & + size(CI_eigenvectors,1),CI_electronic_energy, & + N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) + else + print *, irp_here + stop 'bug' + endif else call davidson_diag_HS2(psi_det, & CI_eigenvectors, & diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..129fb2ea --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option requieres a way smaller amount of memory but is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[security_mo_class] +type: logical +doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..e9d51654 --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,2 @@ +hartree_fock +utils_trust_region diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..a142ec20 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,108 @@ +# mo_localization + +Some parameters can be changed with qp edit in the mo_localization section +(cf below). Similarly for the trust region parameters in the +utils_trust_region section. The localization without the trust region +is not available for the moment. + +The irf.f files can be generated from the org ones using emacs. +If you modify the .org files, don't forget to do (you need emacs): +``` +./TANGLE_org_mode.sh +ninja +``` + +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +After that the ezfio directory contains the localized MOs + +But to do so the mo_class must be defined before, run +``` +qp set_mo_class -q +``` +for more information or +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +to set the mo classes. We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. + +If you just on kind of mo class to localize all the MOs between them +you have to put: +``` +qp set mo_localization security_mo_class false +``` + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a rotation between the MOs. +This feature can be removed by setting: +``` +qp set mo_localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set mo_localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set mo_localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set mo_localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set mo_localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set mo_localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set mo_localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set mo_localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set mo_localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Further improvements: +- Cleaner repo +- Correction of the errors in the documentations +- option with/without trust region diff --git a/src/mo_localization/TANGLE_org_mode.sh b/src/mo_localization/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..6a1003df --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,42 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbitals:' + print*, 'qp set orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end diff --git a/src/mo_localization/break_spatial_sym.org b/src/mo_localization/break_spatial_sym.org new file mode 100644 index 00000000..5995b138 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.org @@ -0,0 +1,43 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbitals:' + print*, 'qp set orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end +#+END_SRC diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..a0e5432d --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,62 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_gradient_loc.org b/src/mo_localization/debug_gradient_loc.org new file mode 100644 index 00000000..1fcb7fca --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.org @@ -0,0 +1,64 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..bfb1cbbc --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,62 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:,:), H2(:,:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(H(n,n),H2(n,n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i,i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i,i)) > threshold) then + print*,H(i,i) + nb_error = nb_error + 1 + if (dabs(H(i,i)) > max_elem) then + max_elem = H(i,i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/debug_hessian_loc.org b/src/mo_localization/debug_hessian_loc.org new file mode 100644 index 00000000..bc23818c --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.org @@ -0,0 +1,64 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:,:), H2(:,:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(H(n,n),H2(n,n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i,i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i,i)) > threshold) then + print*,H(i,i) + nb_error = nb_error + 1 + if (dabs(H(i,i)) > max_elem) then + max_elem = H(i,i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..3ca90fb5 --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,31 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbital:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end diff --git a/src/mo_localization/kick_the_mos.org b/src/mo_localization/kick_the_mos.org new file mode 100644 index 00000000..3d57da37 --- /dev/null +++ b/src/mo_localization/kick_the_mos.org @@ -0,0 +1,33 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbital:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end +#+END_SRC diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..6d5cc876 --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,531 @@ +program localization + call run_localization +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method (with qp_edit) + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (n_act_orb == mo_num .or. & + n_core_orb + n_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to localize all the orbitals:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + +! Loc + + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization.org b/src/mo_localization/localization.org new file mode 100644 index 00000000..293ea06b --- /dev/null +++ b/src/mo_localization/localization.org @@ -0,0 +1,2859 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +qp set_mo_class -h + +*** Foster-Boys localization +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Locality of the orbitals: +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + call run_localization +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method (with qp_edit) + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (n_act_orb == mo_num .or. & + n_core_orb + n_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to localize all the orbitals:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'' + print*,'---End gradient_FB---' + print*,'' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'' + print*,'---End gradient_FB_omp---' + print*,'' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + print*,'' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'' + print*,'---End hessian_FB---' + print*,'' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do j = 1, tmp_n + do i = 1, tmp_n + H(i,j) = 0d0 + enddo + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'' + print*,'---End hessian_FB_omp---' + print*,'' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'' + print*,'---End gradient_PM---' + print*,'' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) < max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Min elem H:', max_elem +! +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) > max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Max elem H:', max_elem +! +! max_elem = 1d10 +! do tmp_k = 1, tmp_n +! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'' + print*,'---End hessian_PM---' + print*,'' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + print*,'' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + print*,'' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:), tmp_list(:) + +! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) +! +! do i = 1, mo_num +! fock_energies_tmp(i) = Fock_matrix_diag_mo(i) +! print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i) +! iorder(i) = i +! enddo +! +! print*,'' +! print*,'Sorting by Fock energies' +! print*,'' +! +! call dsort(fock_energies_tmp, iorder, mo_num) +! +! do i = 1, mo_num +! k = iorder(i) +! print*,'fock_energies_new(i) = ',fock_energies_tmp(i) +! do j = 1, ao_num +! new_mo_coef(j,i) = mo_coef(j,k) +! enddo +! enddo + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..5ca89790 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2054 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'' + print*,'---End gradient_FB---' + print*,'' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'' + print*,'---End gradient_FB_omp---' + print*,'' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + print*,'' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'' + print*,'---End hessian_FB---' + print*,'' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do j = 1, tmp_n + do i = 1, tmp_n + H(i,j) = 0d0 + enddo + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'' + print*,'---End hessian_FB_omp---' + print*,'' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'' + print*,'---End gradient_PM---' + print*,'' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) < max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Min elem H:', max_elem +! +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) > max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Max elem H:', max_elem +! +! max_elem = 1d10 +! do tmp_k = 1, tmp_n +! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'' + print*,'---End hessian_PM---' + print*,'' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + print*,'' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + print*,'' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:), tmp_list(:) + +! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) +! +! do i = 1, mo_num +! fock_energies_tmp(i) = Fock_matrix_diag_mo(i) +! print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i) +! iorder(i) = i +! enddo +! +! print*,'' +! print*,'Sorting by Fock energies' +! print*,'' +! +! call dsort(fock_energies_tmp, iorder, mo_num) +! +! do i = 1, mo_num +! k = iorder(i) +! print*,'fock_energies_new(i) = ',fock_energies_tmp(i) +! do j = 1, ao_num +! new_mo_coef(j,i) = mo_coef(j,k) +! enddo +! enddo + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f index 3642365e..2afdcf89 100644 --- a/src/mo_two_e_ints/core_quantities.irp.f +++ b/src/mo_two_e_ints/core_quantities.irp.f @@ -59,3 +59,45 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ] enddo END_PROVIDER + +BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Active Hamiltonian with 3-index exchange integrals: + ! + ! $\tilde{h}{pq} = h_{pq} - \frac{1}{2}\sum_{k} g(pk,kq)$ + END_DOC + + integer :: i,j, k + integer :: p,q, r + ! core-core contribution + h_act_ri = core_fock_operator + !print *,' Bef----hact(1,14)=',h_act_ri(4,14) + ! act-act contribution + do p=1,n_act_orb + j=list_act(p) + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = mo_one_e_integrals(i,j) + enddo + do r=1,n_act_orb + k=list_act(r) + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + enddo + enddo + enddo + ! core-act contribution + !do p=1,n_act_orb + ! j=list_core(p) + ! do k=1,n_core_orb + ! do q=1,n_act_orb + ! i=list_act(q) + ! h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + ! enddo + ! enddo + !enddo + !print *,' Aft----hact(1,14)=',h_act_ri(4,14), mo_one_e_integrals(4,14) +END_PROVIDER + diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 809f594b..2f84e753 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1136,6 +1136,104 @@ subroutine ortho_svd(A,LDA,m,n) end +! QR to orthonormalize CSFs does not work :-( +!subroutine ortho_qr_withB(A,LDA,B,m,n) +! implicit none +! BEGIN_DOC +! ! Orthogonalization using Q.R factorization +! ! +! ! A : Overlap Matrix +! ! +! ! LDA : leftmost dimension of A +! ! +! ! m : Number of rows of A +! ! +! ! n : Number of columns of A +! ! +! ! B : Output orthogonal basis +! ! +! END_DOC +! integer, intent(in) :: m,n, LDA +! double precision, intent(inout) :: A(LDA,n) +! double precision, intent(inout) :: B(LDA,n) +! +! integer :: LWORK, INFO +! integer, allocatable :: jpvt(:) +! double precision, allocatable :: TAU(:), WORK(:) +! double precision, allocatable :: C(:,:) +! double precision :: norm +! integer :: i,j +! +! allocate (TAU(min(m,n)), WORK(1)) +! allocate (jpvt(n)) +! !print *," In function ortho" +! B = A +! +! jpvt(1:n)=1 +! +! LWORK=-1 +! call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) +! +! ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! LWORK=max(n,int(WORK(1))) +! +! deallocate(WORK) +! allocate(WORK(LWORK)) +! call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) +! print *,A +! print *,jpvt +! deallocate(WORK,TAU) +! !stop +! +! !LWORK=-1 +! !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) +! !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! !LWORK=max(n,int(WORK(1))) +! +! !deallocate(WORK) +! !allocate(WORK(LWORK)) +! !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) +! +! !LWORK=-1 +! !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) +! !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! !LWORK=max(n,int(WORK(1))) +! +! !deallocate(WORK) +! !allocate(WORK(LWORK)) +! !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) +! ! +! !allocate(C(LDA,n)) +! !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA) +! !norm = 0.0d0 +! !B = 0.0d0 +! !!print *,C +! !do i=1,m +! ! norm = 0.0d0 +! ! do j=1,n +! ! norm = norm + C(j,i)*C(j,i) +! ! end do +! ! norm = 1.0d0/dsqrt(norm) +! ! do j=1,n +! ! B(j,i) = C(j,i) +! ! end do +! !end do +! !print *,B +! +! +! !deallocate(WORK,TAU) +!end + +!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf") +! use iso_c_binding +! integer(c_int32_t), value :: LDA +! integer(c_int32_t), value :: m +! integer(c_int32_t), value :: n +! integer(c_int16_t) :: A(LDA,n) +! integer(c_int16_t) :: B(LDA,n) +! call ortho_qr_withB(A,LDA,B,m,n) +!end subroutine ortho_qr_csf + subroutine ortho_qr(A,LDA,m,n) implicit none BEGIN_DOC diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index 184d8052..c9e35ee1 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -430,3 +430,28 @@ subroutine lowercase(txt,n) enddo end +subroutine v2_over_x(v,x,res) + + !BEGIN_DOC + ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0 + !END_DOC + + implicit none + + double precision, intent(in) :: v, x + double precision, intent(out) :: res + + double precision :: delta_E, tmp, val + + res = 0d0 + delta_E = x + if (v == 0.d0) return + + val = 2d0 * v + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + res = 0.5d0 * (tmp - delta_E) + +end diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg new file mode 100644 index 00000000..9c9f6248 --- /dev/null +++ b/src/utils_trust_region/EZFIO.cfg @@ -0,0 +1,89 @@ +[thresh_delta] +type: double precision +doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta +interface: ezfio,provider,ocaml +default: 1.e-10 + +[thresh_rho] +type: double precision +doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_eig] +type: double precision +doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[thresh_model] +type: double precision +doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[absolute_eig] +type: logical +doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) +interface: ezfio,provider,ocaml +default: false + +[thresh_wtg] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_wtg2] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day +interface: ezfio,provider,ocaml +default: 1.e-6 + +[avoid_saddle] +type: logical +doc: Test to avoid saddle point, active if true +interface: ezfio,provider,ocaml +default: false + +[version_avoid_saddle] +type: integer +doc: cf. trust region, not stable +interface: ezfio,provider,ocaml +default: 3 + +[thresh_rho_2] +type: double precision +doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_cc] +type: double precision +doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_model_2] +type: double precision +doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning +interface: ezfio,provider,ocaml +default: 1.e-12 + +[version_lambda_search] +type: integer +doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 +interface: ezfio,provider,ocaml +default: 2 + +[nb_it_max_lambda] +type: integer +doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 100 + +[nb_it_max_pre_search] +type: integer +doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED new file mode 100644 index 00000000..1a65ce38 --- /dev/null +++ b/src/utils_trust_region/NEED @@ -0,0 +1 @@ +hartree_fock diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst new file mode 100644 index 00000000..6a0689b6 --- /dev/null +++ b/src/utils_trust_region/README.rst @@ -0,0 +1,5 @@ +============ +trust_region +============ + +The documentation can be found in the org files. diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f new file mode 100644 index 00000000..eac17275 --- /dev/null +++ b/src/utils_trust_region/algo_trust.irp.f @@ -0,0 +1,248 @@ +! Algorithm for the trust region + +! step_in_trust_region: +! Computes the step in the trust region (delta) +! (automatically sets at the iteration 0 and which evolves during the +! process in function of the evolution of rho). The step is computing by +! constraining its norm with a lagrange multiplier. +! Since the calculation of the step is based on the Newton method, an +! estimation of the gain in energy is given using the Taylors series +! truncated at the second order (criterion_model). +! If (DABS(criterion-criterion_model) < 1d-12) then +! must_exit = .True. +! else +! must_exit = .False. + +! This estimation of the gain in energy is used by +! is_step_cancel_trust_region to say if the step is accepted or cancelled. + +! If the step must be cancelled, the calculation restart from the same +! hessian and gradient and recomputes the step but in a smaller trust +! region and so on until the step is accepted. If the step is accepted +! the hessian and the gradient are recomputed to produce a new step. + +! Example: + + +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo + + + +! Variables: + +! Input: +! | n | integer | m*(m-1)/2 | +! | m | integer | number of mo in the mo_class | +! | H(n,n) | double precision | Hessian | +! | v_grad(n) | double precision | Gradient | +! | W(n,n) | double precision | Eigenvectors of the hessian | +! | e_val(n) | double precision | Eigenvalues of the hessian | +! | criterion | double precision | Actual criterion | +! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +! | rho | double precision | Given by is_step_cancel_trus_region | +! | | | Agreement between the real function and the Taylor series (2nd order) | +! | nb_iter | integer | Actual number of iterations | + +! Input/output: +! | delta | double precision | Radius of the trust region | + +! Output: +! | criterion_model | double precision | Predicted criterion after the rotation | +! | x(n) | double precision | Step | +! | must_exit | logical | If the program must exit the loop | + + +subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + END_DOC + + implicit none + + ! in + integer, intent(in) :: n, nb_iter + double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine + + + +! Variables: + +! Input: +! | nb_iter | integer | actual number of iterations | +! | prev_criterion | double precision | criterion before the application of the step x | +! | criterion | double precision | criterion after the application of the step x | +! | criterion_model | double precision | predicted criterion after the application of x | + +! Output: +! | rho | double precision | Agreement between the predicted criterion and the real new criterion | +! | cancel_step | logical | If the step must be cancelled | + + +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + BEGIN_DOC + ! Compute if the step should be cancelled + END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org new file mode 100644 index 00000000..aa836f98 --- /dev/null +++ b/src/utils_trust_region/algo_trust.org @@ -0,0 +1,593 @@ +* Algorithm for the trust region + +step_in_trust_region: +Computes the step in the trust region (delta) +(automatically sets at the iteration 0 and which evolves during the +process in function of the evolution of rho). The step is computing by +constraining its norm with a lagrange multiplier. +Since the calculation of the step is based on the Newton method, an +estimation of the gain in energy is given using the Taylors series +truncated at the second order (criterion_model). +If (DABS(criterion-criterion_model) < 1d-12) then + must_exit = .True. +else + must_exit = .False. + +This estimation of the gain in energy is used by +is_step_cancel_trust_region to say if the step is accepted or cancelled. + +If the step must be cancelled, the calculation restart from the same +hessian and gradient and recomputes the step but in a smaller trust +region and so on until the step is accepted. If the step is accepted +the hessian and the gradient are recomputed to produce a new step. + +Example: + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo +#+END_SRC + +Variables: + +Input: +| n | integer | m*(m-1)/2 | +| m | integer | number of mo in the mo_class | +| H(n,n) | double precision | Hessian | +| v_grad(n) | double precision | Gradient | +| W(n,n) | double precision | Eigenvectors of the hessian | +| e_val(n) | double precision | Eigenvalues of the hessian | +| criterion | double precision | Actual criterion | +| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +| rho | double precision | Given by is_step_cancel_trus_region | +| | | Agreement between the real function and the Taylor series (2nd order) | +| nb_iter | integer | Actual number of iterations | + +Input/output: +| delta | double precision | Radius of the trust region | + +Output: +| criterion_model | double precision | Predicted criterion after the rotation | +| x(n) | double precision | Step | +| must_exit | logical | If the program must exit the loop | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + END_DOC + + implicit none + + ! in + integer, intent(in) :: n, nb_iter + double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine +#+END_SRC + +Variables: + +Input: +| nb_iter | integer | actual number of iterations | +| prev_criterion | double precision | criterion before the application of the step x | +| criterion | double precision | criterion after the application of the step x | +| criterion_model | double precision | predicted criterion after the application of x | + +Output: +| rho | double precision | Agreement between the predicted criterion and the real new criterion | +| cancel_step | logical | If the step must be cancelled | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + BEGIN_DOC + ! Compute if the step should be cancelled + END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine +#+END_SRC + +** Template for MOs +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt +subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) + double precision, allocatable :: prev_mos(:,:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation + integer :: nb_iter, info, nb_sub_iter + integer :: i,j,tmp_i,tmp_j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Provide the criterion, but unnecessary because it's done + ! automatically + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! The new hessian and gradient are computed at the end of the previous iteration + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Forces the step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! touch mo_coef + call clear_mo_map ! Only if you are using the bi-electronic integrals + ! mo_coef becomes valid + ! And avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! To enforce the program to provide new criterion after the update + ! of the parameters + FREE C_PROVIDER + PROVIDE C_PROVIDER + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step ? + if (cancel_step) then + ! Replacement by the previous MOs + mo_coef = prev_mos + ! call save_mos() ! depends of the time for 1 iteration + + ! No need to clear_mo_map since we don't recompute the gradient and the hessian + ! mo_coef becomes valid + ! Avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + else + ! The step is accepted: + ! criterion -> prev criterion + + ! The replacement "criterion -> prev criterion" is already done + ! in trust_region_rho, so if the criterion does not have a reason + ! to change, it will change nothing for the criterion and will + ! force the program to provide the new hessian, gradient and + ! convergence criterion for the next iteration. + ! But in the case of orbital optimization we diagonalize the CI + ! matrix after the "FREE" statement, so the criterion will change + + FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + prev_criterion = C_PROVIDER + + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! call save_mos() ! depends of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! Provide the convergence criterion + ! Provide the gradient and the hessian for the next iteration + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + ! Save the final MOs + call save_mos() + + ! Diagonalization of the hessian + ! (To see the eigenvalues at the end of the optimization) + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) + +end +#+END_SRC + +** Cartesian version +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt +subroutine algo_trust_cartesian_template(tmp_n) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit + integer :: nb_iter, nb_sub_iter + integer :: i,j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) + + PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + if (nb_iter > 0) then + PROVIDE H_PROVIDER g_PROVIDER + endif + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! New coordinates, check the sign + X_PROVIDER = X_PROVIDER - tmp_x + + ! touch X_PROVIDER + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! New criterion + PROVIDE C_PROVIDER ! Unnecessary + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancel the previous step + if (cancel_step) then + ! Replacement by the previous coordinates, check the sign + X_PROVIDER = X_PROVIDER + tmp_x + + ! Avoid the recomputation of the hessian and the gradient + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + deallocate(e_val, W, tmp_x) + +end +#+END_SRC + +** Script template +#+BEGIN_SRC bash :tangle script_template_mos.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + +#+BEGIN_SRC bash :tangle script_template_xyz.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_X_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file +sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f new file mode 100644 index 00000000..e274ec11 --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -0,0 +1,85 @@ +! Apply MO rotation +! Subroutine to apply the rotation matrix to the coefficients of the +! MOs. + +! New MOs = Old MOs . Rotation matrix + +! *Compute the new MOs with the previous MOs and a rotation matrix* + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +! Intent in: +! | R(mo_num,mo_num) | double precision | rotation matrix | + +! Intent out: +! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +! Internal: +! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +! | i,j | integer | indexes | + +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + !if (debug) then + ! print*,'New mo_coef : ' + ! do i = 1, mo_num + ! write(*,'(100(F10.5))') mo_coef(i,:) + ! enddo + !endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org new file mode 100644 index 00000000..918581b7 --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.org @@ -0,0 +1,86 @@ +* Apply MO rotation +Subroutine to apply the rotation matrix to the coefficients of the +MOs. + +New MOs = Old MOs . Rotation matrix + +*Compute the new MOs with the previous MOs and a rotation matrix* + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +Intent in: +| R(mo_num,mo_num) | double precision | rotation matrix | + +Intent out: +| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +Internal: +| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +| i,j | integer | indexes | +#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + !if (debug) then + ! print*,'New mo_coef : ' + ! do i = 1, mo_num + ! write(*,'(100(F10.5))') mo_coef(i,:) + ! enddo + !endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f new file mode 100644 index 00000000..35e12232 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.irp.f @@ -0,0 +1,61 @@ +! Matrix to vector index + +! *Compute the index i of a vector element from the indexes p,q of a +! matrix element* + +! Lower diagonal matrix (p,q), p > q -> vector (i) + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | p,q | integer | indexes of a matrix element in the lower diagonal | +! | | | p > q, q -> column | +! | | | p -> row, | +! | | | q -> column | + +! Input: +! | i | integer | corresponding index in the vector | + + +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org new file mode 100644 index 00000000..50840584 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.org @@ -0,0 +1,63 @@ +* Matrix to vector index + +*Compute the index i of a vector element from the indexes p,q of a +matrix element* + +Lower diagonal matrix (p,q), p > q -> vector (i) + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| p,q | integer | indexes of a matrix element in the lower diagonal | +| | | p > q, q -> column | +| | | p -> row, | +| | | q -> column | + +Input: +| i | integer | corresponding index in the vector | + +#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine +#+END_SRC + diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h new file mode 100644 index 00000000..bbfabfec --- /dev/null +++ b/src/utils_trust_region/pi.h @@ -0,0 +1,2 @@ + !logical, parameter :: debug=.False. + double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f new file mode 100644 index 00000000..4738fd67 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -0,0 +1,443 @@ +! Rotation matrix + +! *Build a rotation matrix from an antisymmetric matrix* + +! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +! $$ +! \textbf{R}=\exp(\textbf{A}) +! $$ + +! So : +! \begin{align*} +! \textbf{R}=& \exp(\textbf{A}) \\ +! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +! \end{align*} + +! With : +! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +! $\tau$ : $\sqrt{-x}$ +! $x$ : eigenvalues of $\textbf{A}^2$ + +! Input: +! | A(n,n) | double precision | antisymmetric matrix | +! | n | integer | number of columns of the A matrix | +! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +! Output: +! | R(n,n) | double precision | Rotation matrix | +! | info | integer | if info = 0, the execution is successful | +! | | | if info = k, the k-th parameter has an illegal value | +! | | | if info = -k, the algorithm failed | + +! Internal: +! | B(n,n) | double precision | B = A.A | +! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +! | W(n,n) | double precision | eigenvectors of B | +! | e_val(n) | double precision | eigenvalues of B | +! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | +! | part_1a(n,n) | double precision | matrix cos_tau.W^t | +! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +! | part_2a(n,n) | double precision | matrix W^t.A | +! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +! | norm | integer | norm of R.R^t-1, must be equal to 0 | +! | i,j | integer | indexes | + +! Functions: +! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | +! | disnan | logical | Lapack function, check if an element is NaN | + + + +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) + +! Pre-conditions + +! Initialization +info=0 +enforce_step_cancellation = .False. + +! Size of matrix A must be at least 1 by 1 +if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return +endif + +! Leading dimension of A must be >= n +if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return +endif + +! Leading dimension of A must be >= n +if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return +endif + +! Matrix elements of A must by non-NaN +do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo +enddo + +do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif +enddo + +do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo +enddo + +! Fix for too big elements ! bad idea better to cancel if the error is too big +!do j = 1, n +! do i = 1, n +! A(i,j) = mod(A(i,j),2d0*pi) +! if (dabs(A(i,j)) > pi) then +! A(i,j) = 0d0 +! endif +! enddo +!enddo + +max_elem_A = 0d0 +do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo +enddo +print*,'max element in A', max_elem_A + +if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' +endif + +! B=A.A +! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ +! - Diagonalization of $\textbf{B}$ +! W, the eigenvectors +! e_val, the eigenvalues + + +! Compute B=A.A + +call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + +! Copy B in W, diagonalization will put the eigenvectors in W +W=B + +! Diagonalization of B +! Eigenvalues -> e_val +! Eigenvectors -> W +lwork = 3*n-1 +allocate(work(lwork,n)) + +print*,'Starting diagonalization ...' + +call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + +deallocate(work) + +if (info2 == 0) then + print*, 'Diagonalization : Done' +elseif (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' +else + print*, "WARNING: Diagonalization failed to converge" +endif + +! Tau^-1, cos(tau), sin(tau) +! $$\tau = \sqrt{-x}$$ +! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ +! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ +! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ +! These matrices are diagonals + +! Diagonal matrix m_diag +do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif +enddo + +m_diag = 0.d0 +do i = 1, n + m_diag(i,i) = e_val(i) +enddo + +! cos_tau +do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo +enddo + +! sin_tau +do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo +enddo + +! Debug, display the cos_tau and sin_tau matrix +!if (debug) then +! print*, 'cos_tau' +! do i = 1, n +! print*, cos_tau(i,:) +! enddo +! print*, 'sin_tau' +! do i = 1, n +! print*, sin_tau(i,:) +! enddo +!endif + +! tau^-1 +do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo +enddo + +max_elem = 0d0 +do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif +enddo +print*,'max elem tau^-1:', max_elem + +! Debug +!print*,'eigenvalues:' +!do i = 1, n +! print*, e_val(i) +!enddo + +!Debug, display tau^-1 +!if (debug) then +! print*, 'tau^-1' +! do i = 1, n +! print*,tau_m1(i,:) +! enddo +!endif + +! Rotation matrix +! \begin{align*} +! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} +! \begin{align*} +! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} +! \end{align*} +! \begin{align*} +! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} + +! First: +! part_1 = dgemm(W, dgemm(cos_tau, W^t)) +! part_1a = dgemm(cos_tau, W^t) +! part_1 = dgemm(W, part_1a) +! And: +! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) +! part_2a = dgemm(W^t, A) +! part_2b = dgemm(sin_tau, part_2a) +! part_2c = dgemm(tau_m1, part_2b) +! part_2 = dgemm(W, part_2c) +! Finally: +! Rotation matrix, R = part_1+part_2 + +! If $R$ is a rotation matrix: +! $R.R^T=R^T.R=\textbf{1}$ + +! part_1 +call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + +! part_2 +call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) +call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) +call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + +! Rotation matrix R +R = part_1 + part_2 + +! Matrix check +! R.R^t and R^t.R must be equal to identity matrix +do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo +enddo + +call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + +norm = dnrm2(n*n,RR_t,1) +print*, 'Rotation matrix check, norm R.R^T = ', norm + +! Debug +!if (debug) then +! print*, 'RR_t' +! do i = 1, n +! print*, RR_t(i,:) +! enddo +!endif + +! Post conditions + +! Check if R.R^T=1 +max_elem = 0d0 +do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo +enddo + +print*, 'Max error in R.R^T:', max_elem +print*, 'e_val(1):', e_val(1) +print*, 'e_val(n):', e_val(n) +print*, 'max elem in A:', max_elem_A + +if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. +endif + +! Matrix elements of R must by non-NaN +do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo +enddo + +! Display +!if (debug) then +! print*,'Rotation matrix :' +! do i = 1, n +! write(*,'(100(F10.5))') R(i,:) +! enddo +!endif + +! Deallocation, end + +deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org new file mode 100644 index 00000000..73ba0298 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.org @@ -0,0 +1,454 @@ +* Rotation matrix + +*Build a rotation matrix from an antisymmetric matrix* + +Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +$$ +\textbf{R}=\exp(\textbf{A}) +$$ + +So : +\begin{align*} +\textbf{R}=& \exp(\textbf{A}) \\ +=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +\end{align*} + +With : +$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +$\tau$ : $\sqrt{-x}$ +$x$ : eigenvalues of $\textbf{A}^2$ + +Input: +| A(n,n) | double precision | antisymmetric matrix | +| n | integer | number of columns of the A matrix | +| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +Output: +| R(n,n) | double precision | Rotation matrix | +| info | integer | if info = 0, the execution is successful | +| | | if info = k, the k-th parameter has an illegal value | +| | | if info = -k, the algorithm failed | + +Internal: +| B(n,n) | double precision | B = A.A | +| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +| W(n,n) | double precision | eigenvectors of B | +| e_val(n) | double precision | eigenvalues of B | +| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +| part_1(n,n) | double precision | matrix W.cos_tau.W^t | +| part_1a(n,n) | double precision | matrix cos_tau.W^t | +| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +| part_2a(n,n) | double precision | matrix W^t.A | +| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +| norm | integer | norm of R.R^t-1, must be equal to 0 | +| i,j | integer | indexes | + +Functions: +| dnrm2 | double precision | Lapack function, compute the norm of a matrix | +| disnan | logical | Lapack function, check if an element is NaN | + + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) +#+END_SRC + +** Pre-conditions +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Initialization + info=0 + enforce_step_cancellation = .False. + + ! Size of matrix A must be at least 1 by 1 + if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return + endif + + ! Leading dimension of A must be >= n + if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return + endif + + ! Leading dimension of A must be >= n + if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return + endif + + ! Matrix elements of A must by non-NaN + do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo + enddo + + do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif + enddo + + do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo + enddo + + ! Fix for too big elements ! bad idea better to cancel if the error is too big + !do j = 1, n + ! do i = 1, n + ! A(i,j) = mod(A(i,j),2d0*pi) + ! if (dabs(A(i,j)) > pi) then + ! A(i,j) = 0d0 + ! endif + ! enddo + !enddo + + max_elem_A = 0d0 + do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo + enddo + print*,'max element in A', max_elem_A + + if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' + endif + +#+END_SRC + +** Calculations + +*** B=A.A + - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ + - Diagonalization of $\textbf{B}$ + W, the eigenvectors + e_val, the eigenvalues + + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Compute B=A.A + + call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + + ! Copy B in W, diagonalization will put the eigenvectors in W + W=B + + ! Diagonalization of B + ! Eigenvalues -> e_val + ! Eigenvectors -> W + lwork = 3*n-1 + allocate(work(lwork,n)) + + print*,'Starting diagonalization ...' + + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + + deallocate(work) + + if (info2 == 0) then + print*, 'Diagonalization : Done' + elseif (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' + else + print*, "WARNING: Diagonalization failed to converge" + endif + #+END_SRC + +*** Tau^-1, cos(tau), sin(tau) + $$\tau = \sqrt{-x}$$ + - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ + - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ + - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ + These matrices are diagonals + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Diagonal matrix m_diag + do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif + enddo + + m_diag = 0.d0 + do i = 1, n + m_diag(i,i) = e_val(i) + enddo + + ! cos_tau + do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo + enddo + + ! sin_tau + do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo + enddo + + ! Debug, display the cos_tau and sin_tau matrix + !if (debug) then + ! print*, 'cos_tau' + ! do i = 1, n + ! print*, cos_tau(i,:) + ! enddo + ! print*, 'sin_tau' + ! do i = 1, n + ! print*, sin_tau(i,:) + ! enddo + !endif + + ! tau^-1 + do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo + enddo + + max_elem = 0d0 + do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif + enddo + print*,'max elem tau^-1:', max_elem + + ! Debug + !print*,'eigenvalues:' + !do i = 1, n + ! print*, e_val(i) + !enddo + + !Debug, display tau^-1 + !if (debug) then + ! print*, 'tau^-1' + ! do i = 1, n + ! print*,tau_m1(i,:) + ! enddo + !endif + #+END_SRC + +*** Rotation matrix + \begin{align*} + \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + \begin{align*} + \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \end{align*} + \begin{align*} + \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + + First: + part_1 = dgemm(W, dgemm(cos_tau, W^t)) + part_1a = dgemm(cos_tau, W^t) + part_1 = dgemm(W, part_1a) + And: + part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) + part_2a = dgemm(W^t, A) + part_2b = dgemm(sin_tau, part_2a) + part_2c = dgemm(tau_m1, part_2b) + part_2 = dgemm(W, part_2c) + Finally: + Rotation matrix, R = part_1+part_2 + + If $R$ is a rotation matrix: + $R.R^T=R^T.R=\textbf{1}$ + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! part_1 + call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + + ! part_2 + call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) + call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) + call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + + ! Rotation matrix R + R = part_1 + part_2 + + ! Matrix check + ! R.R^t and R^t.R must be equal to identity matrix + do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo + enddo + + call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + + norm = dnrm2(n*n,RR_t,1) + print*, 'Rotation matrix check, norm R.R^T = ', norm + + ! Debug + !if (debug) then + ! print*, 'RR_t' + ! do i = 1, n + ! print*, RR_t(i,:) + ! enddo + !endif + #+END_SRC + +*** Post conditions + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Check if R.R^T=1 + max_elem = 0d0 + do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo + enddo + + print*, 'Max error in R.R^T:', max_elem + print*, 'e_val(1):', e_val(1) + print*, 'e_val(n):', e_val(n) + print*, 'max elem in A:', max_elem_A + + if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. + endif + + ! Matrix elements of R must by non-NaN + do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo + enddo + + ! Display + !if (debug) then + ! print*,'Rotation matrix :' + ! do i = 1, n + ! write(*,'(100(F10.5))') R(i,:) + ! enddo + !endif + #+END_SRC + +** Deallocation, end + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine + #+END_SRC + diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f new file mode 100644 index 00000000..bdd1f6ba --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -0,0 +1,64 @@ +! Rotation matrix in a subspace to rotation matrix in the full space + +! Usually, we are using a list of MOs, for exemple the active ones. When +! we compute a rotation matrix to rotate the MOs, we just compute a +! rotation matrix for these MOs in order to reduce the size of the +! matrix which has to be computed. Since the computation of a rotation +! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +! reuce the number of MOs involved. +! After that we replace the rotation matrix in the full space by +! building the elements of the rotation matrix in the full space from +! the elements of the rotation matrix in the subspace and adding some 0 +! on the extradiagonal elements and some 1 on the diagonal elements, +! for the MOs that are not involved in the rotation. + +! Provided: +! | mo_num | integer | Number of MOs | + +! Input: +! | m | integer | Size of tmp_list, m <= mo_num | +! | tmp_list(m) | integer | List of MOs | +! | tmp_R(m,m) | double precision | Rotation matrix in the space of | +! | | | the MOs containing by tmp_list | + +! Output: +! | R(mo_num,mo_num | double precision | Rotation matrix in the space | +! | | | of all the MOs | + +! Internal: +! | i,j | integer | indexes in the full space | +! | tmp_i,tmp_j | integer | indexes in the subspace | + + +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org new file mode 100644 index 00000000..16434dc8 --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org @@ -0,0 +1,65 @@ +* Rotation matrix in a subspace to rotation matrix in the full space + +Usually, we are using a list of MOs, for exemple the active ones. When +we compute a rotation matrix to rotate the MOs, we just compute a +rotation matrix for these MOs in order to reduce the size of the +matrix which has to be computed. Since the computation of a rotation +matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +reuce the number of MOs involved. +After that we replace the rotation matrix in the full space by +building the elements of the rotation matrix in the full space from +the elements of the rotation matrix in the subspace and adding some 0 +on the extradiagonal elements and some 1 on the diagonal elements, +for the MOs that are not involved in the rotation. + +Provided: +| mo_num | integer | Number of MOs | + +Input: +| m | integer | Size of tmp_list, m <= mo_num | +| tmp_list(m) | integer | List of MOs | +| tmp_R(m,m) | double precision | Rotation matrix in the space of | +| | | the MOs containing by tmp_list | + +Output: +| R(mo_num,mo_num | double precision | Rotation matrix in the space | +| | | of all the MOs | + +Internal: +| i,j | integer | indexes in the full space | +| tmp_i,tmp_j | integer | indexes in the subspace | + +#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f new file mode 100644 index 00000000..b7d849d1 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -0,0 +1,119 @@ +! Predicted energy : e_model + +! *Compute the energy predicted by the Taylor series* + +! The energy is predicted using a Taylor expansion truncated at te 2nd +! order : + +! \begin{align*} +! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +! \end{align*} + +! Input: +! | n | integer | m*(m-1)/2 | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | + +! Output: +! | e_model | double precision | predicted energy after the rotation of the MOs | + +! Internal: +! | part_1 | double precision | v_grad^T.x | +! | part_2 | double precision | 1/2 . x^T.H.x | +! | part_2a | double precision | H.x | +! | i,j | integer | indexes | + +! Function: +! | ddot | double precision | dot product (Lapack) | + + +subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n),H(n,n),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) + +! Calculations + +! part_1 corresponds to the product g.x +! part_2a corresponds to the product H.x +! part_2 corresponds to the product 0.5*(x^T.H.x) + +! TODO: remove the dot products + + +! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + print*,'g.x : ', part_1 + !endif + + ! Product H.x + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + print*,'1/2*x^T.H.x : ', part_2 + !endif + + print*,'prev_energy', prev_energy + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'Predicted energy after the rotation : ', e_model + print*, 'Previous energy - predicted energy:', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + print*,'' + +end subroutine diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org new file mode 100644 index 00000000..58c8f804 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.org @@ -0,0 +1,121 @@ +* Predicted energy : e_model + +*Compute the energy predicted by the Taylor series* + +The energy is predicted using a Taylor expansion truncated at te 2nd +order : + +\begin{align*} +E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +\end{align*} + +Input: +| n | integer | m*(m-1)/2 | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | + +Output: +| e_model | double precision | predicted energy after the rotation of the MOs | + +Internal: +| part_1 | double precision | v_grad^T.x | +| part_2 | double precision | 1/2 . x^T.H.x | +| part_2a | double precision | H.x | +| i,j | integer | indexes | + +Function: +| ddot | double precision | dot product (Lapack) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f +subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n),H(n,n),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) +#+END_SRC + +** Calculations + +part_1 corresponds to the product g.x +part_2a corresponds to the product H.x +part_2 corresponds to the product 0.5*(x^T.H.x) + +TODO: remove the dot products + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f + ! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + print*,'g.x : ', part_1 + !endif + + ! Product H.x + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + print*,'1/2*x^T.H.x : ', part_2 + !endif + + print*,'prev_energy', prev_energy + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'Predicted energy after the rotation : ', e_model + print*, 'Previous energy - predicted energy:', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + print*,'' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f new file mode 100644 index 00000000..85e7bf2a --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -0,0 +1,1655 @@ +! Newton's method to find the optimal lambda + +! *Compute the lambda value for the trust region* + +! This subroutine uses the Newton method in order to find the optimal +! lambda. This constant is added on the diagonal of the hessian to shift +! the eiganvalues. It has a double role: +! - ensure that the resulting hessian is positive definite for the +! Newton method +! - constrain the step in the trust region, i.e., +! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius +! of the trust region. +! We search $\lambda$ which minimizes +! \begin{align*} +! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +! \end{align*} +! or +! \begin{align*} +! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +! \end{align*} +! and gives obviously 0 in both cases. \newline + +! There are several cases: +! - If $\textbf{H}$ is positive definite the interval containing the +! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$ then the interval containing +! the solution is $\lambda \in (-h_1, \infty)$. +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ then the interval containing the solution is +! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < +! 10^{-12}$ are not computed, so the term where $i = 1$ is +! automatically removed and this case becomes similar to the previous one. + +! So to avoid numerical problems (cf. trust_region) we start the +! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +! with $\epsilon$ a little constant. +! The research must be restricted to the interval containing the +! solution. For that reason a little trust region in 1D is used. + +! The Newton method to find the optimal $\lambda$ is : +! \begin{align*} +! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +! \end{align*} +! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +! $\lambda$ at the l-th iteration, +! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +! $\lambda$ at the l-th iteration.\newline + +! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +! \begin{align*} +! y \leq \alpha +! \end{align*} +! with $\alpha$ a scalar representing the trust length (trust region in +! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +! Taylor series truncated at the second order. Thus, if $y > \alpha$, +! the constraint is applied as +! \begin{align*} +! y^* = \alpha \frac{y}{|y|} +! \end{align*} +! with $y^*$ the solution in the trust region. + +! The size of the trust region evolves in function of $\rho$ as for the +! trust region seen previously cf. trust_region, rho_model. +! The prediction of the value of $f$ or $\tilde{f}$ is done using the +! Taylor series truncated at the second order cf. "trust_region", +! "trust_e_model". + +! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +! 1/\Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} +! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! Provided in qp_edit: +! | thresh_rho_2 | +! | thresh_cc | +! | nb_it_max_lambda | +! | version_lambda_search | +! | nb_it_max_pre_search | +! see qp_edit for more details + +! Input: +! | n | integer | m*(m-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +! | delta | double precision | delta for the trust region | + +! Output: +! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +! | | | lambda > 0 | + +! Internal: +! | d1_N | double precision | value of d1_norm_trust_region | +! | d2_N | double precision | value of d2_norm_trust_region | +! | f_N | double precision | value of f_norm_trust_region | +! | prev_f_N | double precision | previous value of f_norm_trust_region | +! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +! | prev_f_R | double precision | previous value of f_R | +! | model | double precision | predicted value of f_R from prev_f_R and y | +! | d_1 | double precision | value of the first derivative | +! | d_2 | double precision | value of the second derivative | +! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +! | prev_lambda | double precision | previous value of lambda | +! | t1,t2,t3 | double precision | wall time | +! | i | integer | index | +! | epsilon | double precision | little constant to avoid numerical problem | +! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +! | version | integer | version of the root finding method | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | f_norm_trust_region | double precision | value of norm(x)^2 | + + + +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + print*,'' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + if (version_lambda_search == 1) then + print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + else + print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + endif + ! Version 2 is normally better + + + +! Resolution with the Newton method: + + + ! Initialization + epsilon = 1d-4 + lambda =MAX(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + print*, 'Pre research of lambda:' + print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + print*, 'e_val(1):', e_val(1) + print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + print*,'Threshold for the CC:', thresh_cc + print*,'Threshold for rho_2:', thresh_rho_2 + + print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !if (debug) then + ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + !endif + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + print*,'--------------------------------------' + print*,'Research of lambda, iteration:', i + print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + print*,'prev lambda:', prev_lambda + print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + if (version_lambda_search == 1) then + print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + else + print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + endif + + print*,'previous - actual:', prev_f_R - f_R + print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + print*,'' + print*,'lambda, ||x||, delta:' + print*, lambda, dsqrt(f_N), delta + print*,'CC:', DABS(1d0 - f_N/delta**2) + print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations :', i + print*,'Value of lambda :', lambda + print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 + print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'' + print*,'---End trust_newton---' + print*,'' + +end subroutine + +! OMP: First derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | mo_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function + +! OMP: Second derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +! This function computes the second derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. +! \begin{align*} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_accu2 | double precision | temporary array for the third sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function + +! OMP: Function value of ||x||^2 + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | + +! Internal: +! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +! | tmp_fN | double precision | temporary array for the function | +! | i,j | integer | indexes | + + +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function + +! First derivative of (||x||^2 - Delta^2)^2 +! Version without omp + +! *Function to compute the first derivative of ||x||^2 - Delta* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | ddot | double precision | blas dot product | + + +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function + +! Second derivative of (||x||^2 - Delta^2)^2 +! Version without OMP + +! *Function to compute the second derivative of ||x||^2 - Delta* + + +! \begin{equation} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{equation} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +! | ddot | double precision | blas dot product | + + +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function + +! Function value of ||x||^2 +! Version without OMP + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | f_norm_trust_region | double precision | value of norm(x)^2 | +! | ddot | double precision | blas dot product | + + + +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function + +! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end + +! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end + +! First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end + +! Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org new file mode 100644 index 00000000..b39c9a10 --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.org @@ -0,0 +1,1665 @@ +* Newton's method to find the optimal lambda + +*Compute the lambda value for the trust region* + +This subroutine uses the Newton method in order to find the optimal +lambda. This constant is added on the diagonal of the hessian to shift +the eiganvalues. It has a double role: +- ensure that the resulting hessian is positive definite for the + Newton method +- constrain the step in the trust region, i.e., + $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius + of the trust region. +We search $\lambda$ which minimizes +\begin{align*} + f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +\end{align*} +or +\begin{align*} + \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +\end{align*} +and gives obviously 0 in both cases. \newline + +There are several cases: +- If $\textbf{H}$ is positive definite the interval containing the + solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$ then the interval containing + the solution is $\lambda \in (-h_1, \infty)$. +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} = 0$ then the interval containing the solution is + $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < + 10^{-12}$ are not computed, so the term where $i = 1$ is + automatically removed and this case becomes similar to the previous one. + +So to avoid numerical problems (cf. trust_region) we start the +algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +with $\epsilon$ a little constant. +The research must be restricted to the interval containing the +solution. For that reason a little trust region in 1D is used. + +The Newton method to find the optimal $\lambda$ is : +\begin{align*} + \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +\end{align*} +$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +$\lambda$ at the l-th iteration, +$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +$\lambda$ at the l-th iteration.\newline + +Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +\begin{align*} + y \leq \alpha +\end{align*} +with $\alpha$ a scalar representing the trust length (trust region in +1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +Taylor series truncated at the second order. Thus, if $y > \alpha$, +the constraint is applied as +\begin{align*} + y^* = \alpha \frac{y}{|y|} +\end{align*} +with $y^*$ the solution in the trust region. + +The size of the trust region evolves in function of $\rho$ as for the +trust region seen previously cf. trust_region, rho_model. +The prediction of the value of $f$ or $\tilde{f}$ is done using the +Taylor series truncated at the second order cf. "trust_region", +"trust_e_model". + +The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 + = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) + \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} +\begin{align*} +\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +1/\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} + {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +Provided in qp_edit: +| thresh_rho_2 | +| thresh_cc | +| nb_it_max_lambda | +| version_lambda_search | +| nb_it_max_pre_search | +see qp_edit for more details + +Input: +| n | integer | m*(m-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +| delta | double precision | delta for the trust region | + +Output: +| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +| | | lambda > 0 | + +Internal: +| d1_N | double precision | value of d1_norm_trust_region | +| d2_N | double precision | value of d2_norm_trust_region | +| f_N | double precision | value of f_norm_trust_region | +| prev_f_N | double precision | previous value of f_norm_trust_region | +| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +| prev_f_R | double precision | previous value of f_R | +| model | double precision | predicted value of f_R from prev_f_R and y | +| d_1 | double precision | value of the first derivative | +| d_2 | double precision | value of the second derivative | +| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +| prev_lambda | double precision | previous value of lambda | +| t1,t2,t3 | double precision | wall time | +| i | integer | index | +| epsilon | double precision | little constant to avoid numerical problem | +| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +| version | integer | version of the root finding method | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| f_norm_trust_region | double precision | value of norm(x)^2 | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + print*,'' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + if (version_lambda_search == 1) then + print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + else + print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + endif + ! Version 2 is normally better +#+END_SRC + +Resolution with the Newton method: + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f + ! Initialization + epsilon = 1d-4 + lambda =MAX(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + print*, 'Pre research of lambda:' + print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + print*, 'e_val(1):', e_val(1) + print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + print*,'Threshold for the CC:', thresh_cc + print*,'Threshold for rho_2:', thresh_rho_2 + + print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !if (debug) then + ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + !endif + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + print*,'--------------------------------------' + print*,'Research of lambda, iteration:', i + print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + print*,'prev lambda:', prev_lambda + print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + if (version_lambda_search == 1) then + print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + else + print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + endif + + print*,'previous - actual:', prev_f_R - f_R + print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + print*,'' + print*,'lambda, ||x||, delta:' + print*, lambda, dsqrt(f_N), delta + print*,'CC:', DABS(1d0 - f_N/delta**2) + print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations :', i + print*,'Value of lambda :', lambda + print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 + print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'' + print*,'---End trust_newton---' + print*,'' + +end subroutine +#+END_SRC + +* OMP: First derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| mo_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function +#+END_SRC + +* OMP: Second derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +This function computes the second derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. +\begin{align*} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ + \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_accu2 | double precision | temporary array for the third sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function +#+END_SRC + +* OMP: Function value of ||x||^2 + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | + +Internal: +| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +| tmp_fN | double precision | temporary array for the function | +| i,j | integer | indexes | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function +#+END_SRC + +* First derivative of (||x||^2 - Delta^2)^2 +Version without omp + +*Function to compute the first derivative of ||x||^2 - Delta* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function +#+END_SRC + +* Second derivative of (||x||^2 - Delta^2)^2 +Version without OMP + +*Function to compute the second derivative of ||x||^2 - Delta* + + +\begin{equation} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{equation} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function +#+END_SRC + +* Function value of ||x||^2 +Version without OMP + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| f_norm_trust_region | double precision | value of norm(x)^2 | +| ddot | double precision | blas dot product | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function +#+END_SRC + +* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end +#+END_SRC + +* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end +#+END_SRC + +* First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end +#+END_SRC + +* Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end +#+END_SRC diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f new file mode 100644 index 00000000..45738736 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -0,0 +1,121 @@ +! Agreement with the model: Rho + +! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +! Rho represents the agreement between the model (the predicted energy +! by the Taylor expansion truncated at the 2nd order) and the real +! energy : + +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} +! With : +! $E^{k}$ the energy at the previous iteration +! $E^{k+1}$ the energy at the actual iteration +! $m^{k+1}$ the predicted energy for the actual iteration +! (cf. trust_e_model) + +! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +! If $\rho \leq 0$ the previous energy is lower than the actual +! energy. We have to cancel the last step and use a smaller trust +! region. +! Here we cancel the last step if $\rho < 0.1$, because even if +! the energy decreases, the agreement is bad, i.e., the Taylor expansion +! truncated at the second order doesn't represent correctly the energy +! landscape. So it's better to cancel the step and restart with a +! smaller trust region. + +! Provided in qp_edit: +! | thresh_rho | + +! Input: +! | prev_energy | double precision | previous energy (energy before the rotation) | +! | e_model | double precision | predicted energy after the rotation | + +! Output: +! | rho | double precision | the agreement between the model (predicted) and the real energy | +! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +! | | | else the previous energy doesn't change | + +! Internal: +! | energy | double precision | energy (real) after the rotation | +! | i | integer | index | +! | t* | double precision | time | + + +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + call wall_time(t1) + +! Rho +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} + +! In function of $\rho$ th step can be accepted or cancelled. + +! If we cancel the last step (k+1), the previous energy (k) doesn't +! change! +! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + + +! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + print*, 'previous energy, prev_energy :', prev_energy + print*, 'predicted energy, e_model :', e_model + print*, 'real energy, energy :', energy + print*, 'prev_energy - energy :', prev_energy - energy + print*, 'prev_energy - e_model :', prev_energy - e_model + print*, 'Rho :', rho + print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + print*,'' + +end subroutine diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org new file mode 100644 index 00000000..9b25ee29 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.org @@ -0,0 +1,123 @@ +* Agreement with the model: Rho + +*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +Rho represents the agreement between the model (the predicted energy +by the Taylor expansion truncated at the 2nd order) and the real +energy : + +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} +With : +$E^{k}$ the energy at the previous iteration +$E^{k+1}$ the energy at the actual iteration +$m^{k+1}$ the predicted energy for the actual iteration +(cf. trust_e_model) + +If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +If $\rho \leq 0$ the previous energy is lower than the actual +energy. We have to cancel the last step and use a smaller trust +region. +Here we cancel the last step if $\rho < 0.1$, because even if +the energy decreases, the agreement is bad, i.e., the Taylor expansion +truncated at the second order doesn't represent correctly the energy +landscape. So it's better to cancel the step and restart with a +smaller trust region. + +Provided in qp_edit: +| thresh_rho | + +Input: +| prev_energy | double precision | previous energy (energy before the rotation) | +| e_model | double precision | predicted energy after the rotation | + +Output: +| rho | double precision | the agreement between the model (predicted) and the real energy | +| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +| | | else the previous energy doesn't change | + +Internal: +| energy | double precision | energy (real) after the rotation | +| i | integer | index | +| t* | double precision | time | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + call wall_time(t1) +#+END_SRC + +** Rho +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} + +In function of $\rho$ th step can be accepted or cancelled. + +If we cancel the last step (k+1), the previous energy (k) doesn't +change! +If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f + ! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + print*, 'previous energy, prev_energy :', prev_energy + print*, 'predicted energy, e_model :', e_model + print*, 'real energy, energy :', energy + print*, 'prev_energy - energy :', prev_energy - energy + print*, 'prev_energy - e_model :', prev_energy - e_model + print*, 'Rho :', rho + print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + print*,'' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f new file mode 100644 index 00000000..42aa6ed4 --- /dev/null +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -0,0 +1,716 @@ +! Trust region + +! *Compute the next step with the trust region algorithm* + +! The Newton method is an iterative method to find a minimum of a given +! function. It uses a Taylor series truncated at the second order of the +! targeted function and gives its minimizer. The minimizer is taken as +! the new position and the same thing is done. And by doing so +! iteratively the method find a minimum, a local or global one depending +! of the starting point and the convexity/nonconvexity of the targeted +! function. + +! The goal of the trust region is to constrain the step size of the +! Newton method in a certain area around the actual position, where the +! Taylor series is a good approximation of the targeted function. This +! area is called the "trust region". + +! In addition, in function of the agreement between the Taylor +! development of the energy and the real energy, the size of the trust +! region will be updated at each iteration. By doing so, the step sizes +! are not too larges. In addition, since we add a criterion to cancel the +! step if the energy increases (more precisely if rho < 0.1), so it's +! impossible to diverge. \newline + +! References: \newline +! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +! ISBN: 978-0-387-40065-5 \newline + +! By using the first and the second derivatives, the Newton method gives +! a step: +! \begin{align*} +! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot +! \textbf{g}_{(k)} +! \end{align*} +! which leads to the minimizer of the Taylor series. +! !!! Warning: the Newton method gives the minimizer if and only if +! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! +! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +! \begin{align*} +! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +! \end{align*} +! which is equivalent to +! \begin{align*} +! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +! \end{align*} + +! with: \newline +! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +! size n) \newline +! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +! matrix) \newline +! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +! size n) \newline +! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +! \newline + +! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +! hypersphere of radius $\Delta_{(k+1)}$.\newline + +! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +! $\textbf{H}$ is positive definite, the +! solution is the step given by the Newton method +! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +! Else we have to constrain the step size. For simplicity we will remove +! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +! Starting from the Taylor series of a function E (here, the energy) +! truncated at the 2nd order, we have: +! \begin{align*} +! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} +! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + +! \mathcal{O}(\textbf{x}^2) +! \end{align*} + +! With the constraint on the norm of $\textbf{x}$ we can write the +! Lagrangian +! \begin{align*} +! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} +! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +! \end{align*} +! Where: \newline +! $\lambda$ is the Lagrange multiplier \newline +! $E$ is the energy at the k-th iteration $\Leftrightarrow +! E(\textbf{x} = \textbf{0})$ \newline + +! To solve this equation, we search a stationary point where the first +! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +! \end{align*} + +! The derivative is: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! \end{align*} + +! So, we search $\textbf{x}$ such as: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +! \end{align*} + +! We can rewrite that as: +! \begin{align*} +! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +! \end{align*} +! with $\textbf{I}$ is the identity matrix. + +! By doing so, the solution is: +! \begin{align*} +! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +! \end{align*} +! \begin{align*} +! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} +! with $\textbf{x}^T \textbf{x} = \Delta^2$. + +! We have to solve this previous equation to find this $\textbf{x}$ in the +! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +! just a one dimension problem because we can express $\textbf{x}$ as a +! function of $\lambda$: +! \begin{align*} +! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} + +! We start from the fact that the hessian is diagonalizable. So we have: +! \begin{align*} +! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +! \end{align*} +! with: \newline +! $\textbf{H}$, the hessian matrix \newline +! $\textbf{W}$, the matrix containing the eigenvectors \newline +! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +! $h_i$, the i-th eigenvalue in ascending order \newline + +! Now we use the fact that adding a constant on the diagonal just shifts +! the eigenvalues: +! \begin{align*} +! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} +! +\textbf{I} \lambda) \cdot \textbf{W}^T +! \end{align*} + +! By doing so we can express $\textbf{x}$ as a function of $\lambda$ +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! with $\lambda \neq - h_i$. + +! An interesting thing in our case is the norm of $\textbf{x}$, +! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot +! \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +! And if we study the properties of this function we see that: +! \begin{align*} +! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +! \end{align*} +! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +! \begin{align*} +! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +! \end{align*} + +! From these limits and knowing that $h_1$ is the lowest eigenvalue, we +! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +! strictly decreasing function on the interval $\lambda \in +! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +! solution. + +! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +! Newton method is only defined for a positive definite hessian matrix, +! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +! definite. Consequently, in the case where $\textbf{H}$ is not positive +! definite, to ensure the positive definiteness, $\lambda$ must be +! greater than $- h_1$. +! \begin{align*} +! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +! \end{align*} + +! From that there are five cases: +! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} +! \lambda)$ +! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is +! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = +! 0)|| \leq \Delta$ +! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point + +! Thus to find the solution, we can write: +! \begin{align*} +! ||\textbf{x}(\lambda)|| = \Delta +! \end{align*} +! \begin{align*} +! ||\textbf{x}(\lambda)|| - \Delta = 0 +! \end{align*} + +! Taking the square of this equation +! \begin{align*} +! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +! \end{align*} +! we have a function with one minimum for the optimal $\lambda$. +! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} + +! But in practice, it is more effective to solve: +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} + +! To do that, we just use the Newton method with "trust_newton" using +! first and second derivative of $(||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\textbf{x}$. +! This will give the optimal $\lambda$ to compute the +! solution $\textbf{x}$ with the formula seen previously: +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} + +! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + + + + +! Evolution of the trust region + +! We initialize the trust region at the first iteration using a radius +! \begin{align*} +! \Delta = ||\textbf{x}(\lambda=0)|| +! \end{align*} + +! And for the next iteration the trust region will evolves depending of +! the agreement of the energy prediction based on the Taylor series +! truncated at the 2nd order and the real energy. If the Taylor series +! truncated at the 2nd order represents correctly the energy landscape +! the trust region will be extent else it will be reduced. In order to +! mesure this agreement we use the ratio rho cf. "rho_model" and +! "trust_e_model". From that we use the following values: +! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +! In addition, if $\rho < 0.1$ the iteration is cancelled, so it +! restarts with a smaller trust region until the energy decreases. + + + + +! Summary + +! To summarize, knowing the hessian (eigenvectors and eigenvalues), the +! gradient and the radius of the trust region we can compute the norm of +! the Newton step +! \begin{align*} +! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n +! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +! \end{align*} + +! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and +! $\textbf{x}(\lambda=0)$ is in the trust region and it is not +! necessary to put a constraint on $\textbf{x}$, the solution is the +! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in +! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = +! -h_1)$, similarly to the previous case. +! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we +! have to search $\lambda \in (-h_1, \infty)$ such as +! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} +! or +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} +! which is numerically more stable. And finally compute +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we +! do exactly the same thing that the previous case but we search +! $\lambda \in (0, \infty)$ +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the +! sum), again we do exactly the same thing that the previous case +! searching $\lambda \in (-h_1, \infty)$. + + +! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +! necessary in fact to remove the $j = 1$ in the sum since the term +! where $h_i - \lambda < 10^{-6}$ are not computed. + +! After that, we take this vector $\textbf{x}^*$, called "x", and we do +! the transformation to an antisymmetric matrix $\textbf{X}$, called +! m_x. This matrix $\textbf{X}$ will be used to compute a rotation +! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +! NB: +! An improvement can be done using a elleptical trust region. + + + + +! Code + +! Provided: +! | mo_num | integer | number of MOs | + +! Cf. qp_edit in orbital optimization section, for some constants/thresholds + +! Input: +! | m | integer | number of MOs | +! | n | integer | m*(m-1)/2 | +! | H(n, n) | double precision | hessian | +! | v_grad(n) | double precision | gradient | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n, n) | double precision | eigenvectors of the hessian | +! | rho | double precision | agreement between the model and the reality, | +! | | | represents the quality of the energy prediction | +! | nb_iter | integer | number of iteration | + +! Input/Ouput: +! | delta | double precision | radius of the trust region | + +! Output: +! | x(n) | double precision | vector containing the step | + +! Internal: +! | accu | double precision | temporary variable to compute the step | +! | lambda | double precision | lagrange multiplier | +! | trust_radius2 | double precision | square of the radius of the trust region | +! | norm2_x | double precision | norm^2 of the vector x | +! | norm2_g | double precision | norm^2 of the vector containing the gradient | +! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +! | i, j, k | integer | indexes | + +! Function: +! | dnrm2 | double precision | Blas function computing the norm | +! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + + +subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compuet the step in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) + +! Initialization and norm + +! The norm of the step size will be useful for the trust region +! algorithm. We start from a first guess and the radius of the trust +! region will evolve during the optimization. + +! avoid_saddle is actually a test to avoid saddle points + + +! Initialization of the Lagrange multiplier +lambda = 0d0 + +! List of w^T.g, to avoid the recomputation +tmp_wtg = 0d0 +do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo +enddo + +! Replacement of the small tmp_wtg corresponding to a negative eigenvalue +! in the case of avoid_saddle +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + +endif + +! Norm^2 of x, ||x||^2 +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) +! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta +! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm +! Anyway if the step is too big it will be reduced +print*,'||x||^2 :', norm2_x + +! Norm^2 of the gradient, ||v_grad||^2 +norm2_g = (dnrm2(n,v_grad,1))**2 +print*,'||grad||^2 :', norm2_g + +! Trust radius initialization + +! At the first iteration (nb_iter = 0) we initialize the trust region +! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = +! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, +! we compute this norm using f_norm_trust_region_omp as explain just +! below) + + +! trust radius +if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) +endif + +! Modification of the trust radius + +! In function of rho (which represents the agreement between the model +! and the reality, cf. rho_model) the trust region evolves. We update +! delta (the radius of the trust region). + +! To avoid too big trust region we put a maximum size. + + +! Modification of the trust radius in function of rho +if (rho >= 0.75d0) then + delta = 2d0 * delta +elseif (rho >= 0.5d0) then + delta = delta +elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta +else + delta = 0.25d0 * delta +endif + +! Maximum size of the trust region +!if (delta > 0.5d0 * n * pi) then +! delta = 0.5d0 * n * pi +! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' +!endif + +if (delta > 1d10) then + delta = 1d10 +endif + +print*, 'Delta :', delta + +! Calculation of the optimal lambda + +! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant +! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the +! unconstrained one, $\lambda = 0$ + +! You will find more details at the beginning + + +! By giving delta, we search (||x||^2 - delta^2)^2 = 0 +! and not (||x||^2 - delta)^2 = 0 + +! Research of lambda to solve ||x(lambda)|| = Delta + +! Display +print*, 'e_val(1) = ', e_val(1) +print*, 'w_1^T.g =', tmp_wtg(1) + +! H positive definite +if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + print*, '||x(0)||=', dsqrt(norm2_x) + print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +! H indefinite +else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + if (DABS(tmp_wtg(1)) < thresh_wtg) then + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + else + print*, 'H indefinite, w_1^T.g =/= 0' + endif + print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +endif + +! Recomputation of the norm^2 of the step x +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) +print*,'' +print*,'Summary after the trust region:' +print*,'lambda:', lambda +print*,'||x||:', dsqrt(norm2_x) +print*,'delta:', delta + +! Calculation of the step x + +! x refers to $\textbf{x}^*$ +! We compute x in function of lambda using its formula : +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i +! + \lambda} \cdot \textbf{w}_i +! \end{align*} + + +! Initialisation +x = 0d0 + +! Calculation of the step x + +! Normal version +if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + +! Version to use the absolute value of the eigenvalues +else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + +endif + +double precision :: beta, norm_x + +! Test +! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) +! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first +! eigenvectors multiply by a constant to ensure the condition +! ||x(lambda=-e_val(1))|| = delta and escape the saddle point +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif +endif + +! Transformation of x + +! x is a vector of size n, so it can be write as a m by m +! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i 0$ ($\lambda = 0$ is the unconstraint solution). But the +Newton method is only defined for a positive definite hessian matrix, +so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +definite. Consequently, in the case where $\textbf{H}$ is not positive +definite, to ensure the positive definiteness, $\lambda$ must be +greater than $- h_1$. +\begin{align*} + \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +\end{align*} + +From that there are five cases: +- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} + \lambda)$ + must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is + similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = + 0)|| \leq \Delta$ + but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point + +Thus to find the solution, we can write: +\begin{align*} + ||\textbf{x}(\lambda)|| = \Delta +\end{align*} +\begin{align*} + ||\textbf{x}(\lambda)|| - \Delta = 0 +\end{align*} + +Taking the square of this equation +\begin{align*} + (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +\end{align*} +we have a function with one minimum for the optimal $\lambda$. +Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +\begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +\end{align*} + +But in practice, it is more effective to solve: +\begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +\end{align*} + +To do that, we just use the Newton method with "trust_newton" using +first and second derivative of $(||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\textbf{x}$. +This will give the optimal $\lambda$ to compute the +solution $\textbf{x}$ with the formula seen previously: +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} + +The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Evolution of the trust region + +We initialize the trust region at the first iteration using a radius +\begin{align*} + \Delta = ||\textbf{x}(\lambda=0)|| +\end{align*} + +And for the next iteration the trust region will evolves depending of +the agreement of the energy prediction based on the Taylor series +truncated at the 2nd order and the real energy. If the Taylor series +truncated at the 2nd order represents correctly the energy landscape +the trust region will be extent else it will be reduced. In order to +mesure this agreement we use the ratio rho cf. "rho_model" and +"trust_e_model". From that we use the following values: +- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +In addition, if $\rho < 0.1$ the iteration is cancelled, so it +restarts with a smaller trust region until the energy decreases. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Summary + +To summarize, knowing the hessian (eigenvectors and eigenvalues), the +gradient and the radius of the trust region we can compute the norm of +the Newton step +\begin{align*} + ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n + \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +\end{align*} + +- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and + $\textbf{x}(\lambda=0)$ is in the trust region and it is not + necessary to put a constraint on $\textbf{x}$, the solution is the + unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in + the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = + -h_1)$, similarly to the previous case. + But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we + have to search $\lambda \in (-h_1, \infty)$ such as + $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method + \begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 + \end{align*} + or + \begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 + \end{align*} + which is numerically more stable. And finally compute + \begin{align*} + \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i + \end{align*} +- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we + do exactly the same thing that the previous case but we search + $\lambda \in (0, \infty)$ +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the + sum), again we do exactly the same thing that the previous case + searching $\lambda \in (-h_1, \infty)$. + + +For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +necessary in fact to remove the $j = 1$ in the sum since the term +where $h_i - \lambda < 10^{-6}$ are not computed. + +After that, we take this vector $\textbf{x}^*$, called "x", and we do +the transformation to an antisymmetric matrix $\textbf{X}$, called +m_x. This matrix $\textbf{X}$ will be used to compute a rotation +matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +NB: +An improvement can be done using a elleptical trust region. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Code + +Provided: +| mo_num | integer | number of MOs | + +Cf. qp_edit in orbital optimization section, for some constants/thresholds + +Input: +| m | integer | number of MOs | +| n | integer | m*(m-1)/2 | +| H(n, n) | double precision | hessian | +| v_grad(n) | double precision | gradient | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n, n) | double precision | eigenvectors of the hessian | +| rho | double precision | agreement between the model and the reality, | +| | | represents the quality of the energy prediction | +| nb_iter | integer | number of iteration | + +Input/Ouput: +| delta | double precision | radius of the trust region | + +Output: +| x(n) | double precision | vector containing the step | + +Internal: +| accu | double precision | temporary variable to compute the step | +| lambda | double precision | lagrange multiplier | +| trust_radius2 | double precision | square of the radius of the trust region | +| norm2_x | double precision | norm^2 of the vector x | +| norm2_g | double precision | norm^2 of the vector containing the gradient | +| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +| i, j, k | integer | indexes | + +Function: +| dnrm2 | double precision | Blas function computing the norm | +| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compuet the step in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) +#+END_SRC + + +*** Initialization and norm + +The norm of the step size will be useful for the trust region +algorithm. We start from a first guess and the radius of the trust +region will evolve during the optimization. + +avoid_saddle is actually a test to avoid saddle points + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialization of the Lagrange multiplier + lambda = 0d0 + + ! List of w^T.g, to avoid the recomputation + tmp_wtg = 0d0 + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo + + ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue + ! in the case of avoid_saddle + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + + endif + + ! Norm^2 of x, ||x||^2 + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta + ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm + ! Anyway if the step is too big it will be reduced + print*,'||x||^2 :', norm2_x + + ! Norm^2 of the gradient, ||v_grad||^2 + norm2_g = (dnrm2(n,v_grad,1))**2 + print*,'||grad||^2 :', norm2_g +#+END_SRC + +*** Trust radius initialization + + At the first iteration (nb_iter = 0) we initialize the trust region + with the norm of the step generate by the Newton's method ($\textbf{x}_1 = + (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, + we compute this norm using f_norm_trust_region_omp as explain just + below) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! trust radius + if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) + endif +#+END_SRC + +*** Modification of the trust radius + +In function of rho (which represents the agreement between the model +and the reality, cf. rho_model) the trust region evolves. We update +delta (the radius of the trust region). + +To avoid too big trust region we put a maximum size. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Modification of the trust radius in function of rho + if (rho >= 0.75d0) then + delta = 2d0 * delta + elseif (rho >= 0.5d0) then + delta = delta + elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta + else + delta = 0.25d0 * delta + endif + + ! Maximum size of the trust region + !if (delta > 0.5d0 * n * pi) then + ! delta = 0.5d0 * n * pi + ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' + !endif + + if (delta > 1d10) then + delta = 1d10 + endif + + print*, 'Delta :', delta +#+END_SRC + +*** Calculation of the optimal lambda + +We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant + $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the + unconstrained one, $\lambda = 0$ + +You will find more details at the beginning + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 + ! and not (||x||^2 - delta)^2 = 0 + + ! Research of lambda to solve ||x(lambda)|| = Delta + + ! Display + print*, 'e_val(1) = ', e_val(1) + print*, 'w_1^T.g =', tmp_wtg(1) + + ! H positive definite + if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + print*, '||x(0)||=', dsqrt(norm2_x) + print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + ! H indefinite + else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + if (DABS(tmp_wtg(1)) < thresh_wtg) then + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + else + print*, 'H indefinite, w_1^T.g =/= 0' + endif + print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + endif + + ! Recomputation of the norm^2 of the step x + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + print*,'' + print*,'Summary after the trust region:' + print*,'lambda:', lambda + print*,'||x||:', dsqrt(norm2_x) + print*,'delta:', delta +#+END_SRC + +*** Calculation of the step x + +x refers to $\textbf{x}^*$ +We compute x in function of lambda using its formula : +\begin{align*} +\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i ++ \lambda} \cdot \textbf{w}_i +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialisation + x = 0d0 + + ! Calculation of the step x + + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif + + double precision :: beta, norm_x + + ! Test + ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) + ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first + ! eigenvectors multiply by a constant to ensure the condition + ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif + endif +#+END_SRC + +*** Transformation of x + +x is a vector of size n, so it can be write as a m by m +antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | i | integer | index in the vector | + +! Ouput: +! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | +! | | | p > q, | +! | | | p -> row, | +! | | | q -> column | + + +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org new file mode 100644 index 00000000..0a09fa86 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_index.org @@ -0,0 +1,72 @@ +* Vector to matrix indexes + +*Compute the indexes p,q of a matrix element with the vector index i* + +Vector (i) -> lower diagonal matrix (p,q), p > q + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| i | integer | index in the vector | + +Ouput: +| p,q | integer | corresponding indexes in the lower diagonal of a matrix | +| | | p > q, | +| | | p -> row, | +| | | q -> column | + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f new file mode 100644 index 00000000..9140b8d3 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -0,0 +1,39 @@ +! Vect to antisymmetric matrix using mat_to_vec_index + +! Vector to antisymmetric matrix transformation using mat_to_vec_index +! subroutine. + +! Can be done in OMP (for the first part and with omp critical for the second) + + +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + BEGIN_DOC + ! Vector to antisymmetric matrix + END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org new file mode 100644 index 00000000..4e358a88 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.org @@ -0,0 +1,40 @@ +* Vect to antisymmetric matrix using mat_to_vec_index + +Vector to antisymmetric matrix transformation using mat_to_vec_index +subroutine. + +Can be done in OMP (for the first part and with omp critical for the second) + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + BEGIN_DOC + ! Vector to antisymmetric matrix + END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end +#+END_SRC