diff --git a/src/csf/cfgCI_interface.f90 b/src/csf/cfgCI_interface.f90 new file mode 100644 index 00000000..1d727cd1 --- /dev/null +++ b/src/csf/cfgCI_interface.f90 @@ -0,0 +1,49 @@ +module cfunctions + use, intrinsic :: ISO_C_BINDING + interface + subroutine printcfglist(nint, ncfgs, cfglist) bind(C, name='printCFGList') + import C_INT32_T, C_INT64_T + integer(kind=C_INT32_T) :: nint + integer(kind=C_INT32_T) :: ncfgs + integer(kind=C_INT64_T) :: cfglist(nint,2,ncfgs) + end subroutine printcfglist + end interface + interface + subroutine getApqIJMatrixDims(Isomo, Jsomo, MS, rowsout, colsout) & + bind(C, name='getApqIJMatrixDims') + import C_INT32_T, C_INT64_T + integer(kind=C_INT64_T),value,intent(in) :: Isomo ! CSFI + integer(kind=C_INT64_T),value,intent(in) :: Jsomo ! CSFJ + integer(kind=C_INT64_T),value,intent(in) :: MS ! Ms = 2*Spin + integer(kind=C_INT32_T),intent(out):: rowsout + integer(kind=C_INT32_T),intent(out):: colsout + end subroutine getApqIJMatrixDims + end interface + interface + subroutine getApqIJMatrixDriver(Isomo, Jsomo, orbp, orbq, & + MS, NMO, CSFICSFJApqIJ, rowsmax, colsmax) bind(C, name='getApqIJMatrixDriverArrayInp') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT64_T),value,intent(in) :: Isomo + integer(kind=C_INT64_T),value,intent(in) :: Jsomo + integer(kind=C_INT32_T),value,intent(in) :: orbp + integer(kind=C_INT32_T),value,intent(in) :: orbq + integer(kind=C_INT64_T),value,intent(in) :: MS + integer(kind=C_INT64_T),value,intent(in) :: NMO + integer(kind=C_INT32_T),intent(in) :: rowsmax + integer(kind=C_INT32_T),intent(in) :: colsmax + real (kind=C_DOUBLE ),intent(out) :: CSFICSFJApqIJ(rowsmax,colsmax) + !integer(kind=C_INT32_T),dimension(rowApqIJ,colApqIJ) :: ApqIJ + end subroutine getApqIJMatrixDriver + end interface + interface + subroutine getCSFtoDETTransformationMatrix(Isomo,& + MS, rowsmax, colsmax, csftodetmatrix) bind(C, name='convertCSFtoDetBasis') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT64_T),value,intent(in) :: Isomo + integer(kind=C_INT64_T),value,intent(in) :: MS + integer(kind=C_INT32_T),intent(in) :: rowsmax + integer(kind=C_INT32_T),intent(in) :: colsmax + real (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax) + end subroutine getCSFtoDETTransformationMatrix + end interface + end module cfunctions diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c new file mode 100644 index 00000000..23b984a0 --- /dev/null +++ b/src/csf/cfgCI_utils.c @@ -0,0 +1,1763 @@ +#include +#include +#include "tree_utils.h" + +void int_to_bin_digit(int64_t in, int count, int* out) +{ + /* assert: count <= sizeof(int)*CHAR_BIT */ + unsigned int mask = 1U << (count-1); + int i; + for (i = 0; i < count; i++) { + out[i] = (in & mask) ? 1 : 0; + in <<= 1; + } +} + +#include +#include +#include + +double logbinom(double n, double k) { + return lgamma(n+1)-lgamma(n-k+1)-lgamma(k+1); +} +double binom(double n, double k) { + return exp(logbinom(n,k)); +} + +void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){ + int nsomo = *inpnsomo; + int ms = *inpms; + int nparcoupl = (nsomo + ms)/2; + *outncsfs = binom(nsomo, nparcoupl); +} + +void getncsfs(int NSOMO, int MS, int *outncsfs){ + int nparcoupl = (NSOMO + MS)/2; + int nparcouplp1 = ((NSOMO + MS)/2)+1; + double tmpndets=0.0; + if(NSOMO == 0){ + (*outncsfs) = 1; + return; + } + tmpndets = binom(NSOMO, nparcoupl); + (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1)); +} + +#include + +void getBFIndexList(int NSOMO, int *BF1, int *IdxListBF1){ + int Iidx; + int Jidx; + int BFcopy[NSOMO]; + + int dictidx[2]; + dictidx[0] = -1; + dictidx[1] = 1; + + for(int i = 0; i < NSOMO; i++) + BFcopy[i] = BF1[i]; + + for(int i = 0; i < NSOMO; i++){ + Iidx = i; + if(BFcopy[i] == 0){ + int countN1=0; + for(int j = i+1; j < NSOMO; j++){ + Jidx = j; + countN1 = countN1 + dictidx[BFcopy[j]]; + if(countN1 > 0){ + break; + } + } + BFcopy[Iidx] = -1; + BFcopy[Jidx] = -1; + IdxListBF1[Jidx] = Iidx; + IdxListBF1[Iidx] = Jidx; + } + } + +} + +void getIslands(int NSOMO, int *BF1, int *BF2, int *nislands, int *phasefactor){ + + // Get BF ids + int *IdxListBF1 = malloc(NSOMO * sizeof(int)); + int *IdxListBF2 = malloc(NSOMO * sizeof(int)); + + getBFIndexList(NSOMO, BF1, IdxListBF1); + getBFIndexList(NSOMO, BF2, IdxListBF2); + + int sumids = 0; + int maxcount=0; + *nislands = 0; + *phasefactor = 1; + + int BF1copy[NSOMO]; + for(int i = 0; i < NSOMO; i++) + BF1copy[i] = IdxListBF1[i]; + int BF2copy[NSOMO]; + for(int i = 0; i < NSOMO; i++) + BF2copy[i] = IdxListBF2[i]; + + for(int i = 0; i < NSOMO; i++){ + int thisId = i; + int nextId = BF1copy[i]; + maxcount = 0; + while(BF1copy[thisId] != -1 && maxcount < 20){ + if(maxcount==0) *nislands += 1; + if(maxcount==19) *nislands -= 1; + + maxcount++; + + // First the bra + nextId = BF1copy[thisId]; + BF1copy[thisId] = -1; + BF1copy[nextId] = -1; + + // Get the phase factor bra + if(nextId < thisId) *phasefactor *= -1; + + // Then the ket + thisId = BF2copy[nextId]; + BF2copy[thisId] = -1; + BF2copy[nextId] = -1; + + // Get the phase factor bra + if(nextId < thisId) *phasefactor *= -1; + + } + + for(int j=0;j>= 1; + } + *nsetbits = count; +} + +void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOMO){ + getSetBits(Isomo, NSOMO); + buildTreeDriver(bftree, *NSOMO, MS, NBF); +} + +void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix){ + + // vector + double norm = 0.0; + double scalarprod = 0.0; + orthoMatrix[(rows-1)*cols + cols-1] = 1.0; + for(int i = cols-2; i > -1; i--){ orthoMatrix[(rows-1)*cols + i] = 0.0; } + + // Gram-Schmidt loop + for(int i = rows-2; i > -1; i--){ + for(int k = cols-1; k > -1; k--){ orthoMatrix[(i)*cols + k] = 0.0; } + orthoMatrix[i*cols + i] = 1.0; + + // orthogonalization + for(int j = rows-1; j > i; j--){ + // calculate scalar product + scalarprod = 0.0; + for(int k = cols-1;k>=j;k--){ + scalarprod += orthoMatrix[j*cols + k] * overlapMatrix[i*cols + k]; + } + for(int k = cols-1; k >= j; k--){ + orthoMatrix[i*cols + k] -= scalarprod * orthoMatrix[j*cols + k]; + } + } + + // Normalization + norm = 0.0; + for(int j = rows-1; j >= i; j--){ + for(int k=cols-1; k >= i; k--) + norm += orthoMatrix[i*cols + j]*orthoMatrix[i*cols + k]*overlapMatrix[j*cols+k]; + } + norm = sqrt(norm); + for(int j = rows-1; j >= i; j--){ + orthoMatrix[i*cols + j] /= norm; + } + + } + +} + +void get_phase_cfg_to_qp_inpList(int *inpdet, int NSOMO, int *phaseout){ + int nbetas=0; + (*phaseout) = 1; + for(int i=0;i 0){ + mask = (1< 0.0) phaseAll = 1.0; + //} + for(int j=0;j 0) free(overlapMatrixI); + if(rowsI + colsI > 0) free(orthoMatrixI); + if(rowsbftodetI + colsbftodetI > 0) free(bftodetmatrixI); + if(rowsI + colsbftodetI > 0) free(tmpcsftodet); +} + +#define BYTE_TO_BINARY_PATTERN "%c%c%c%c%c%c%c%c" +#define BYTE_TO_BINARY(byte) \ + (byte & 0x80 ? '1' : '0'), \ + (byte & 0x40 ? '1' : '0'), \ + (byte & 0x20 ? '1' : '0'), \ + (byte & 0x10 ? '1' : '0'), \ + (byte & 0x08 ? '1' : '0'), \ + (byte & 0x04 ? '1' : '0'), \ + (byte & 0x02 ? '1' : '0'), \ + (byte & 0x01 ? '1' : '0') + +int applyRemoveShftAddSOMOVMO(int idet, int p, int q, int *phase){ + // CSF: 1 1 1 1 0 1 + // DET: 1 0 1 0 1 + // | | + // p q + // p = 4 + // q = 1 + // + // result + // + // CSF: 1 0 1 1 1 1 + // DET: 1 1 0 0 1 + // maskp: + // 0 1 1 1 1 + // maskq: + // 0 0 0 0 1 + // maskpxq: + // 0 1 1 1 0 + // maskqxqi: + // 1 0 0 0 1 + int maskp = (1UL << p)-1; + int maskq = (1UL << q)-1; + int maskpxq = (maskp ^ maskq); + int maskpxqi = ~(maskp ^ maskq); + + // Step 1: remove + // clear bits from p + int outdet = idet; + int occatp = __builtin_popcount(idet & (1UL << (p-1))); + // remove the bit at p + outdet &= ~(1UL << (p-1)); + + // Step 2: shift + if(q > p){ + // start with q + + // calculate the phase + int na, nb; + int tmpdet = outdet & (maskpxq); + na = __builtin_popcount(tmpdet); + nb = __builtin_popcount(maskpxq) - na; + //int nfermions = occatp == 0 ? nb : na; + int nfermions = na+nb; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + + int tmpdetq1 = outdet & maskpxq; + int tmpdetq2 = outdet & maskpxqi; + tmpdetq1 = tmpdetq1 >> 1; + outdet = tmpdetq1 | tmpdetq2; + // put electron at q + outdet = occatp == 0 ? outdet : outdet | (1UL<<(q-1)); + } + else{ + // shift bit to right + maskpxq = maskpxq >> 1; + maskpxqi = ~(maskpxq); + + // calculate the phase + int na, nb; + int tmpdet = outdet & (maskpxq); + na = __builtin_popcount(tmpdet); + nb = __builtin_popcount(maskpxq) - na; + //int nfermions = occatp == 0 ? nb : na; + int nfermions = na+nb; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + + // start with p + // shift middle electrons to right + int tmpdetp1 = outdet & maskpxq; + int tmpdetp2 = outdet & maskpxqi; + tmpdetp1 = tmpdetp1 << 1; + outdet = tmpdetp1 | tmpdetp2; + // put electron at q + outdet = occatp == 0 ? outdet : outdet | (1UL<<(q-1)); + } + + // Done + return(outdet); +} + +int applyRemoveShftAddDOMOSOMO(int idet, int p, int q, int *phase){ + // CSF: 1 2 1 1 1 1 1 1 1 1 + // DET: 1 0 0 1 1 0 0 1 0 + // | | + // p q + // + // result + // + // CSF: 1 1 1 1 1 1 2 1 1 1 + // DET: 1 0 0 0 1 1 0 1 0 + // maskp: + // 0 1 1 1 1 1 1 1 1 + // maskq: + // 0 0 0 0 0 0 1 1 1 1 + int maskp = (1UL << p)-1; + int maskq = (1UL << q)-1; + int maskpxq = (maskp ^ maskq); + int maskpxqi = ~(maskp ^ maskq); + + // Step 1: remove + // clear bits from q + int outdet = idet; + int occatq = __builtin_popcount(idet & (1UL << (q-1))); + outdet &= ~(1UL << (q-1)); + + // Step 2: shift + if(q > p){ + // start with q + + // shift mask between p and q + maskpxq = maskpxq >> 1; + maskpxqi = ~(maskpxq); + // calculate the phase + int na, nb; + int tmpdet = outdet & (maskpxq); + na = __builtin_popcount(tmpdet); + nb = __builtin_popcount(maskpxq) - na; + // spin obb to that at q is moving + //int nfermions = occatq == 0 ? na : nb; + int nfermions = na + nb + 1; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + + int tmpdetq1 = outdet & maskpxq; + int tmpdetq2 = outdet & maskpxqi; + tmpdetq1 = tmpdetq1 << 1; + outdet = tmpdetq1 | tmpdetq2; + + // Step 3: Add bit at p + 1 + outdet = occatq == 1 ? outdet | (1UL<<(p-1)) : outdet; + } + else{ + + // calculate the phase + int na, nb; + int tmpdet = outdet & (maskpxq); + na = __builtin_popcount(tmpdet); + nb = __builtin_popcount(maskpxq) - na; + // spin obb to that at q is moving + //int nfermions = occatq == 0 ? na : nb; + int nfermions = na + nb + 1; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + + // start with p + // shift middle electrons to right + int tmpdetp1 = outdet & maskpxq; + int tmpdetp2 = outdet & maskpxqi; + tmpdetp1 = tmpdetp1 >> 1; + outdet = tmpdetp1 | tmpdetp2; + + // Step 3: Add bit at p + outdet = occatq == 1 ? outdet | (1UL<<(p-1)) : outdet; + } + + // Done + return(outdet); +} + +int applyRemoveShftSOMOSOMO(int idet, int p, int q, int *phase){ + // CSF: 1 1 1 1 1 1 1 1 1 1 + // DET: 1 1 0 0 1 1 0 0 1 0 + // | | + // p q + // + // result + // + // CSF: 1 1 1 1 1 1 1 1 + // DET: 1 0 0 1 1 0 1 0 + // maskp: + // 0 1 1 1 1 1 1 1 1 1 + // maskq: + // 0 0 0 0 0 0 0 1 1 1 + int maskp = (1UL << p)-1; + int maskq = (1UL << q)-1; + int maskpi =~maskp; + int maskqi =~maskq; + + // Step 1: remove + // clear bits from p and q + int outdet = idet; + outdet &= ~(1UL << (p-1)); + outdet &= ~(1UL << (q-1)); + + // calculate the phase + int occatp = idet & (1UL << (p-1)); + int na, nb; + int tmpdet = outdet & (maskp ^ maskq); + na = __builtin_popcount(tmpdet); + nb = abs(p-q)-1 - na; + //int nfermions = occatp == 0 ? nb : na; + + // Step 2: shift + if(q > p){ + int nfermions = occatp == 0 ? na+nb : na+nb+1; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + // start with q + // shift everything left of q + int tmpdetq1 = outdet & maskq; + int tmpdetq2 = outdet & maskqi; + tmpdetq2 = tmpdetq2 >> 1; + outdet = tmpdetq1 | tmpdetq2; + + // shift everything left of p + int tmpdetp1 = outdet & maskp; + int tmpdetp2 = outdet & maskpi; + tmpdetp2 = tmpdetp2 >> 1; + outdet = tmpdetp1 | tmpdetp2; + } + else{ + int nfermions = occatp == 0 ? na+nb+1 : na+nb; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + // start with p + // shift everything left of p + int tmpdetp1 = outdet & maskp; + int tmpdetp2 = outdet & maskpi; + tmpdetp2 = tmpdetp2 >> 1; + outdet = tmpdetp1 | tmpdetp2; + + // shift everything left of q + int tmpdetq1 = outdet & maskq; + int tmpdetq2 = outdet & maskqi; + tmpdetq2 = tmpdetq2 >> 1; + outdet = tmpdetq1 | tmpdetq2; + } + + // Done + return(outdet); +} + +unsigned int shftbit(int num, int p){ + unsigned int maskleft = ~(0 | ((1<> 1; + return(numleft | numright); +}; + +int getphase(int num, int p, int q, int nmo){ + // CSF: 1 1 1 1 1 1 1 1 1 1 + // DET: 1 1 0 0 1 1 0 0 1 0 + // | | + // p q + // | | + // CSF: 1 1 1 1 1 1 1 1 1 1 + // DET: 1 0 0 0 1 1 1 0 1 0 + // + // maskleft: + // 1 1 1 1 1 1 1 0 0 0 + // maskright: + // 0 1 1 1 1 1 1 1 1 1 + int omax = p > q ? p : q; + int omin = p > q ? q : p; + unsigned int maskleft = ~(0 | ((1<<(omin-1))-1)); + unsigned int maskright = ((1<<(omax))-1); + unsigned int maskmo = ((1<SOMO example + + 1 2 1 1 1 + p q + 1 1 1 1 2 + + p = 3 + q = 1 + + in determinant representation: (0->beta,1->alpha) + |I> = 0 0 1 1 + |____| + p q + + |ret> = 0 1 0 1 + A shift of bit at q to pos after p. + + */ + + int maskq = ~((1UL<> 1; + // Now combine with original det + int detout = (idet & maskpq); + // Zero out bits at q + detout &= ~(1UL << (q-1)); + // Set the bit at p + detout |= (1UL << (p-1)); + // Add the shifted bits + detout |= shifted_bits; + + // Now calcaulate the phase + // Find the type of bit at q + int occatq = idet & (1UL << (q-1)); + // calculate number of alpha and beta spins + int na = __builtin_popcount(shifted_bits); + int nb = p - q - na; + // Find the number of fermions to pass + int nfermions = occatq == 0 ? na : nb; + (*phase) = nfermions % 2 == 0 ? 1 : -1; + return(detout); +} + +void calcMEdetpair(int *detlistI, int *detlistJ, int orbI, int orbJ, int Isomo, int Jsomo, int ndetI, int ndetJ, int NMO, double *matelemdetbasis){ + + // Calculation of phase + // The following convention is used + // + // + // The phase is calculated + // assuming all alpha electrons + // are on the left and all beta + // electrons are on the RHS + // of the alphas. + + + int maskI; + int nelecatI; + unsigned int maskleft; + unsigned int maskright; + unsigned int psomo; + unsigned int qsomo; + + + // E(q,p) |I> = cqp |J> + + + int p,q; // The two orbitals p is always > q. + p = orbI >= orbJ ? orbI : orbJ; + q = orbI >= orbJ ? orbJ : orbI; + + // Find the corresponding case + // 1. NdetI > NdetJ (SOMO -> SOMO) + // 2. NdetI < NdetJ (DOMO -> VMO) + // 3. NdetI == NdetJ (SOMO -> VMO and DOMO -> SOMO) + + // Converting the above four cases into int: + int case_type = abs(ndetI - ndetJ) == 0 ? 3 : (ndetI > ndetJ ? 1 : 2); + + switch (case_type){ + case 1: + // SOMO -> SOMO + // Find the orbital ids in model space + maskleft = (0 | ((1<<(p))-1)); + maskright = (0 | ((1<<(q))-1)); + psomo = __builtin_popcount(Isomo & maskleft); + qsomo = q == 1 ? 1 : __builtin_popcount(Isomo & maskright); + p = psomo >= qsomo ? psomo : qsomo; + q = psomo >= qsomo ? qsomo : psomo; + + for(int i=0;i VMO + // Find the orbital ids in model space + maskleft = (0 | ((1<<(p))-1)); + maskright =(0 | ((1<<(q))-1)); + psomo = __builtin_popcount(Jsomo & maskleft); + qsomo = q == 1 ? 1 : __builtin_popcount(Jsomo & maskright); + p = psomo >= qsomo ? psomo : qsomo; + q = psomo >= qsomo ? qsomo : psomo; + + for(int i=0;i VMO or DOMO -> SOMO) + // if Isomo[p] == 1 => SOMO -> VMO + // if Isomo[p] == 0 => DOMO -> SOMO + // Find the orbital ids in model space + maskleft = ((1<<(p))-1); + maskright =((1<<(q))-1); + psomo = __builtin_popcount(Isomo & maskleft); + //qsomo = q == 1 ? 1 : __builtin_popcount(Isomo & maskright); + qsomo = __builtin_popcount(Isomo & maskright); + p = psomo >= qsomo ? psomo : qsomo; + q = psomo >= qsomo ? qsomo : psomo; + + + int noccorbI = (Isomo & (1<<(orbI-1))); + switch (noccorbI){ + case 0: + // Case: DOMO -> SOMO + break; + case 1: + // Case: SOMO -> VMO + break; + default: + printf("Something is wrong in calcMEdetpair\n"); + break; + } + + int tmpidet; + + for(int i=0;i SOMO + // + // I = + // 2 1 1 1 1 + // (10) 0 0 1 1 + // + // | + // \ / + // . + // 0 0 0 1 1 + // + // J = + // 1 1 1 1 2 + // 0 0 1 1 (10) + // + if(nelecalphaatp == 0){ + // Case: DOMO -> SOMO + tmpidet = idet; + int nelecalphaatq = (idet & (1<<(orbJ-1))); + if(nelecalphaatq==0) tmpidet = tmpidet ^ (1<<(orbI-1)); + else tmpidet = tmpidet ^ (0); + idet = shftbit(idet,q); + } + else{ + tmpidet = idet; + idet = shftbit(idet,p); + } + + // Calculate phase + int phase = 1*getphase(tmpidet,orbI,orbJ,NMO); + for(int j=0;j + // + // The phase is calculated + // assuming all alpha electrons + // are on the left and all beta + // electrons are on the RHS + // of the alphas. + + // There are three possibilities + // which need to be separated + // CASE 1. p > q + // CASE 2. p < q + // CASE 3. p == q + + int maskI; + int nelecatI; + int noccorbI; + double phaseI=1.0; + double phaseJ=1.0; + unsigned int maskleft; + unsigned int maskright; + unsigned int psomo; + unsigned int qsomo; + + int p,q; // The two orbitals p is always > q. + + if(orbI > orbJ){ + // CASE 1 : orbI > orbJ + p = orbI; + q = orbJ; + + // Find the corresponding sub case + // 1. NdetI > NdetJ (SOMO -> SOMO) + // 2. NdetI < NdetJ (DOMO -> VMO) + // 3. NdetI == NdetJ (SOMO -> VMO and DOMO -> SOMO) + + // Converting the above four cases into int: + int case_type = abs(ndetI - ndetJ) == 0 ? 3 : (ndetI > ndetJ ? 1 : 2); + p = orbI; + q = orbJ; + + switch (case_type){ + case 1: + // SOMO -> SOMO + // Find the orbital ids in model space + maskleft = (0 | ((1<<(p))-1)); + maskright = (0 | ((1<<(q))-1)); + psomo = __builtin_popcount(Isomo & maskleft); + qsomo = __builtin_popcount(Isomo & maskright); // q has to be atleast 1 + p = psomo; + q = qsomo; + + for(int i=0;i VMO + // Find the orbital ids in model space + // As seen in Jsomo + // Here we apply a^{\dagger}_p a_q |J> + maskleft = (0 | ((1<<(p))-1)); + maskright =(0 | ((1<<(q))-1)); + psomo = __builtin_popcount(Jsomo & maskleft); + qsomo = __builtin_popcount(Jsomo & maskright); // q has to be atleast 1 + p = psomo; + q = qsomo; + + for(int i=0;i VMO or DOMO -> SOMO) + noccorbI = __builtin_popcount(Isomo & (1<<(orbI-1))); + + switch (noccorbI){ + case 0: + // Case: DOMO -> SOMO + // Find the orbital ids in model space + // Ex: + // 2 1 1 1 1 + // p q + // 1 1 1 2 1 + // p = 4 + // q = 2 + // p is from Jsomo + // q is from Isomo + maskleft = ((1<<(p))-1); + maskright =((1<<(q))-1); + psomo = __builtin_popcount(Jsomo & maskleft); + qsomo = __builtin_popcount(Isomo & maskright); + p = psomo; + q = qsomo; + + for(int i=0;i VMO + // Find the orbital ids in model space + // Ex: + // 1 1 1 0 1 + // p q + // 0 1 1 1 1 + // p = 4 + // q = 1 + // p is from Isomo + // q is from Jsomo + maskleft = ((1<<(p))-1); + maskright =((1<<(q))-1); + psomo = __builtin_popcount(Isomo & maskleft); + qsomo = __builtin_popcount(Jsomo & maskright); + p = psomo; + q = qsomo; + + for(int i=0;i orbJ + else if(orbI < orbJ){ + // CASE 2 orbI < orbJ + p = orbI; + q = orbJ; + // Find the corresponding sub case + // 1. NdetI > NdetJ (SOMO -> SOMO) + // 2. NdetI < NdetJ (DOMO -> VMO) + // 3. NdetI == NdetJ (SOMO -> VMO and DOMO -> SOMO) + + // Converting the above four cases into int: + int case_type = abs(ndetI - ndetJ) == 0 ? 3 : (ndetI > ndetJ ? 1 : 2); + + switch (case_type){ + case 1: + // SOMO -> SOMO + // Find the orbital ids in model space + maskleft = (0 | ((1<<(p))-1)); + maskright = (0 | ((1<<(q))-1)); + psomo = __builtin_popcount(Isomo & maskleft); + qsomo = __builtin_popcount(Isomo & maskright); // q has to be atleast 1 + p = psomo; + q = qsomo; + + for(int i=0;i VMO + // Find the orbital ids in model space + // As seen in Jsomo + // Here we apply a^{\dagger}_p a_q |J> + maskleft = (0 | ((1<<(p))-1)); + maskright =(0 | ((1<<(q))-1)); + psomo = __builtin_popcount(Jsomo & maskleft); + qsomo = __builtin_popcount(Jsomo & maskright); // q has to be atleast 1 + p = psomo; + q = qsomo; + + for(int i=0;i VMO or DOMO -> SOMO) + // if Isomo[p] == 1 => SOMO -> VMO + // if Isomo[p] == 0 => DOMO -> SOMO + noccorbI = __builtin_popcount(Isomo & (1<<(orbI-1))); + + switch (noccorbI){ + case 0: + // Case: DOMO -> SOMO + // Find the orbital ids in model space + // Ex: + // 1 1 1 2 1 + // q p + // 2 1 1 1 1 + // p = 1 + // q = 4 + // p is from Jsomo + // q is from Isomo + maskleft = ((1<<(p))-1); + maskright =((1<<(q))-1); + psomo = __builtin_popcount(Jsomo & maskleft); + qsomo = __builtin_popcount(Isomo & maskright); + p = psomo; + q = qsomo; + + for(int i=0;i VMO + // Find the orbital ids in model space + // Ex: + // 0 1 1 1 1 + // q p + // 1 1 1 0 1 + // p = 2 + // q = 4 + // p is from Isomo + // q is from Jsomo + maskleft = ((1<<(p))-1); + maskright =((1<<(q))-1); + psomo = __builtin_popcount(Isomo & maskleft); + qsomo = __builtin_popcount(Jsomo & maskright); + p = psomo; + q = qsomo; + + for(int i=0;i 0.0 || donepq[idxq] > 0.0) continue; + fac *= 2.0; + donepq[idxp] = 1.0; + donepq[idxq] = 1.0; + for(int j = 0; j < npairs; j = j + shft){ + for(int k = 0; k < shft/2; k++){ + detslist[(k+j)*NSOMO + idxp] = 1; + detslist[(k+j)*NSOMO + idxq] = 0; + } + for(int k = shft/2; k < shft; k++){ + detslist[(k+j)*NSOMO + idxp] = 0; + detslist[(k+j)*NSOMO + idxq] = 1; + phaselist[k+j] *=-1; + } + } + shft /= 2; + } + + // Now get the addresses + int inpdet[NSOMO]; + int phase_cfg_to_qp=1; + int addr = -1; + for(int i = 0; i < npairs; i++){ + for(int j = 0; j < NSOMO; j++) + inpdet[j] = detslist[i*NSOMO + j]; + findAddofDetDriver(dettree, NSOMO, inpdet, &addr); + // Calculate the phase for cfg to QP2 conversion + //get_phase_cfg_to_qp_inpList(inpdet, NSOMO, &phase_cfg_to_qp); + //rowvec[addr] = 1.0 * phaselist[i]*phase_cfg_to_qp/sqrt(fac); + rowvec[addr] = 1.0 * phaselist[i]/sqrt(fac); + // Upon transformation from + // SOMO to DET basis, + // all dets have the same phase + // Is this true ? + //rowvec[addr] = 1.0/sqrt(fac); + } + + free(detslist); + free(phaselist); +} + +void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *rows, int *cols){ + + int NSOMO=0; + getSetBits(Isomo, &NSOMO); + int ndets = 0; + int NBF = 0; + double dNSOMO = NSOMO*1.0; + double nalpha = (NSOMO + MS)/2.0; + ndets = (int)binom(dNSOMO, nalpha); + + Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; + dettree.rootNode = malloc(sizeof(Node)); + (*dettree.rootNode) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = NULL, .addr = 0, .cpl = -1, .iSOMO = -1}; + + genDetBasis(&dettree, Isomo, MS, &ndets); + + if(ndets == 1){ + // Initialize transformation matrix + NBF = 1; + (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double)); + (*rows) = 1; + (*cols) = 1; + + double *bftodetmatrix = (*bftodetmatrixptr); + bftodetmatrix[0] = 1.0; + + } + else{ + + //int addr = -1; + //int inpdet[NSOMO]; + //inpdet[0] = 1; + //inpdet[1] = 1; + //inpdet[2] = 1; + //inpdet[3] = 0; + //inpdet[4] = 0; + //inpdet[5] = 0; + + //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr); + + int detlist[ndets]; + getDetlistDriver(&dettree, NSOMO, detlist); + + // Prepare BFs + Tree bftree = (Tree){ .rootNode = NULL, .NBF = -1 }; + bftree.rootNode = malloc(sizeof(Node)); + (*bftree.rootNode) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = NULL, .addr = 0, .cpl = -1, .iSOMO = -1}; + + generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO); + + // Initialize transformation matrix + (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double)); + (*rows) = NBF; + (*cols) = ndets; + + double *bftodetmatrix = (*bftodetmatrixptr); + + // Build BF to det matrix + int addI = 0; + int addJ = 0; + double rowvec[ndets]; + for(int i=0;i 1 + +} + + +void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int colsmax, int *rows, int *cols, double *bftodetmatrix){ + + int NSOMO=0; + getSetBits(Isomo, &NSOMO); + int ndets = 0; + int NBF = 0; + double dNSOMO = NSOMO*1.0; + double nalpha = (NSOMO + MS)/2.0; + ndets = (int)binom(dNSOMO, nalpha); + + Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; + dettree.rootNode = malloc(sizeof(Node)); + (*dettree.rootNode) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = NULL, .addr = 0, .cpl = -1, .iSOMO = -1}; + + genDetBasis(&dettree, Isomo, MS, &ndets); + + //int addr = -1; + //int inpdet[NSOMO]; + //inpdet[0] = 1; + //inpdet[1] = 1; + //inpdet[2] = 1; + //inpdet[3] = 0; + //inpdet[4] = 0; + //inpdet[5] = 0; + + //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr); + + int detlist[ndets]; + getDetlistDriver(&dettree, NSOMO, detlist); + + // Prepare BFs + Tree bftree = (Tree){ .rootNode = NULL, .NBF = -1 }; + bftree.rootNode = malloc(sizeof(Node)); + (*bftree.rootNode) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = NULL, .addr = 0, .cpl = -1, .iSOMO = -1}; + + generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO); + + // Initialize transformation matrix + //(*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double)); + (*rows) = NBF; + (*cols) = ndets; + + //double *bftodetmatrix = (*bftodetmatrixptr); + + // Build BF to det matrix + int addI = 0; + int addJ = 0; + double rowvec[ndets]; + for(int i=0;i maxDetDimPerBF) maxDetDimPerBF = detDimperBF + ncfgprev = cfg_seniority_index(i) + enddo + END_PROVIDER + + +subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) + use bitmasks + implicit none + BEGIN_DOC + ! Documentation for get_phase_qp_to_cfg + ! + ! This function converts from (aaaa)(bbbb) + ! notation to (ab)(ab)(ab)(ab) + ! notation. + ! The cfgCI code works in (ab)(ab)(ab)(ab) + ! notation throughout. + END_DOC + integer(bit_kind),intent(in) :: Ialpha(N_int) + integer(bit_kind),intent(in) :: Ibeta(N_int) + real*8,intent(out) :: phaseout + integer(bit_kind) :: mask(N_int), deta(N_int), detb(N_int) + integer :: nbetas + integer :: count, k +if (N_int >1 ) then + stop 'TODO: get_phase_qp_to_cfg ' +endif + + nbetas = 0 + mask = 0_bit_kind + count = 0 + deta = Ialpha + detb = Ibeta + ! remove the domos + mask = IAND(deta,detb) + deta = IEOR(deta,mask) + detb = IEOR(detb,mask) + mask = 0 + phaseout = 1.0 + k = 1 + do while((deta(k)).GT.0_8) + mask(k) = ISHFT(1_8,count) + if(POPCNT(IAND(deta(k),mask(k))).EQ.1)then + if(IAND(nbetas,1).EQ.0) then + phaseout *= 1.0d0 + else + phaseout *= -1.0d0 + endif + deta(k) = IEOR(deta(k),mask(k)) + else + if(POPCNT(IAND(detb(k),mask(k))).EQ.1) then + nbetas += 1 + detb(k) = IEOR(detb(k),mask(k)) + endif + endif + count += 1 + enddo +end subroutine get_phase_qp_to_cfg + + + + BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,2)] + &BEGIN_PROVIDER [ integer, rowsmax] + &BEGIN_PROVIDER [ integer, colsmax] + use cfunctions + implicit none + BEGIN_DOC + ! Documentation for AIJpqMatrixList + ! The prototype matrix containing the + ! matrices for each I,J somo pair and orb ids. + END_DOC + integer i,j,k,l + integer*8 Isomo, Jsomo, tmpsomo + Isomo = 0 + Jsomo = 0 + integer rows, cols, nsomoi, nsomoj + rows = -1 + cols = -1 + integer*8 MS + MS = elec_alpha_num-elec_beta_num + integer nsomomin + nsomomin = elec_alpha_num-elec_beta_num + rowsmax = 0 + colsmax = 0 + !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) + ! Type + ! 1. SOMO -> SOMO + do i = 2-iand(nsomomin,1), NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + do j = i-2,i-2, 2 + Jsomo = ISHFT(1_8,j)-1 + if(j .GT. NSOMOMax .OR. j .LT. 0) then + cycle + end if + do k = 1,i + do l = 1,i + ! Define Jsomo + if(k.NE.l)then + Jsomo = IBCLR(Isomo, k-1) + Jsomo = IBCLR(Jsomo, l-1) + nsomoi = i + nsomoj = j + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,i)-1 + nsomoi = i + nsomoj = i + endif + + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + if(rowsmax .LT. rows) then + rowsmax = rows + end if + if(colsmax .LT. cols) then + colsmax = cols + end if + ! i -> j + AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,2) = cols + end do + end do + end do + end do + ! Type + ! 2. DOMO -> VMO + do i = 0+iand(nsomomin,1), NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + tmpsomo = ISHFT(1_8,i+2)-1 + do j = i+2,i+2, 2 + Jsomo = ISHFT(1_8,j)-1 + if(j .GT. NSOMOMax .OR. j .LT. 0) then + cycle + end if + do k = 1,j + do l = 1,j + if(k .NE. l) then + Isomo = IBCLR(tmpsomo,k-1) + Isomo = IBCLR(Isomo,l-1) + + ! Define Jsomo + Jsomo = ISHFT(1_8,j)-1; + nsomoi = i + nsomoj = j + else + Isomo = ISHFT(1_8,j)-1 + Isomo = IBCLR(Isomo,1-1) + Isomo = IBCLR(Isomo,j-1) + Jsomo = ISHFT(1_8,j)-1 + Isomo = ISHFT(1_8,j)-1 + nsomoi = j + nsomoj = j + endif + + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + if(rowsmax .LT. rows) then + rowsmax = rows + end if + if(colsmax .LT. cols) then + colsmax = cols + end if + ! i -> j + AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,2) = cols + end do + end do + end do + end do + ! Type + ! 3. SOMO -> VMO + !print *,"Doing SOMO->VMO" + do i = 2-iand(nsomomin,1), NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + do j = i,i, 2 + Jsomo = ISHFT(1_8,j)-1 + if(j .GT. NSOMOMax .OR. j .LE. 0) then + cycle + end if + do k = 1,i + do l = 1,i + if(k .NE. l) then + Isomo = ISHFT(1_8,i+1)-1 + Isomo = IBCLR(Isomo,l-1) + Jsomo = ISHFT(1_8,j+1)-1 + Jsomo = IBCLR(Jsomo,k-1) + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,j)-1 + endif + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + if(rowsmax .LT. rows) then + rowsmax = rows + end if + if(colsmax .LT. cols) then + colsmax = cols + end if + ! i -> j + AIJpqMatrixDimsList(i,j,3,k,l,1) = rows + AIJpqMatrixDimsList(i,j,3,k,l,2) = cols + end do + end do + end do + end do + ! Type + ! 4. DOMO -> VMO + !print *,"Doing DOMO->SOMO" + do i = 2-iand(nsomomin,1), NSOMOMax, 2 + do j = i,i, 2 + if(j .GT. NSOMOMax .OR. j .LE. 0) then + cycle + end if + do k = 1,i + do l = 1,i + if(k .NE. l) then + Isomo = ISHFT(1_8,i+1)-1 + Isomo = IBCLR(Isomo,k+1-1) + Jsomo = ISHFT(1_8,j+1)-1 + Jsomo = IBCLR(Jsomo,l-1) + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,j)-1 + endif + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + if(rowsmax .LT. rows) then + rowsmax = rows + end if + if(colsmax .LT. cols) then + colsmax = cols + end if + ! i -> j + AIJpqMatrixDimsList(i,j,4,k,l,1) = rows + AIJpqMatrixDimsList(i,j,4,k,l,2) = cols + end do + end do + end do + end do + END_PROVIDER + + BEGIN_PROVIDER [ real*8, AIJpqContainer, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,NBFMax,NBFMax)] + use cfunctions + implicit none + BEGIN_DOC + ! Documentation for AIJpqMatrixList + ! The prototype matrix containing the + ! matrices for each I,J somo pair and orb ids. + ! + ! Due to the different types of excitations which + ! include DOMOs and VMOs two prototype DOMOs and two + ! prototype VMOs are needed. Therefore + ! the 4th and 5th dimensions are NSOMOMax+4 and NSOMOMax+4 + ! respectively. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + END_DOC + integer i,j,k,l, orbp, orbq, ri, ci + orbp = 0 + orbq = 0 + integer*8 Isomo, Jsomo, tmpsomo + Isomo = 0 + Jsomo = 0 + integer rows, cols, nsomoi, nsomoj + rows = -1 + cols = -1 + integer*8 MS + MS = 0 + touch AIJpqMatrixDimsList + real*8,dimension(:,:),allocatable :: meMatrix + integer maxdim + !maxdim = max(rowsmax,colsmax) + ! allocate matrix + !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) + ! Type + ! 1. SOMO -> SOMO + do i = 2, NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + do j = i-2,i-2, 2 + if(j .GT. NSOMOMax .OR. j .LT. 0) cycle + do k = 1,i + do l = 1,i + + ! Define Jsomo + if(k .NE. l) then + Jsomo = IBCLR(Isomo, k-1) + Jsomo = IBCLR(Jsomo, l-1) + nsomoi = i + nsomoj = j + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,i)-1 + nsomoi = i + nsomoj = i + endif + + AIJpqContainer(nsomoi,nsomoj,1,k,l,:,:) = 0.0d0 + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + + orbp = k + orbq = l + allocate(meMatrix(rows,cols)) + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(nsomoi,nsomoj,1,k,l,ri,ci) = meMatrix(ri, ci) + end do + end do + deallocate(meMatrix) + end do + end do + end do + end do + ! Type + ! 2. DOMO -> VMO + do i = 0, NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + tmpsomo = ISHFT(1_8,i+2)-1 + do j = i+2,i+2, 2 + if(j .GT. NSOMOMax .OR. j .LE. 0) cycle + Jsomo = ISHFT(1_8,j)-1 + do k = 1,j + do l = 1,j + if(k .NE. l) then + Isomo = IBCLR(tmpsomo,k-1) + Isomo = IBCLR(Isomo,l-1) + ! Define Jsomo + Jsomo = ISHFT(1_8,j)-1; + nsomoi = i + nsomoj = j + else + Isomo = ISHFT(1_8,j)-1 + Isomo = IBCLR(Isomo,1-1) + Isomo = IBCLR(Isomo,j-1) + Jsomo = ISHFT(1_8,j)-1 + Isomo = ISHFT(1_8,j)-1 + nsomoi = j + nsomoj = j + endif + + AIJpqContainer(nsomoi,nsomoj,2,k,l,:,:) = 0.0d0 + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + + orbp = k + orbq = l + allocate(meMatrix(rows,cols)) + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(nsomoi,nsomoj,2,k,l,ri,ci) = meMatrix(ri, ci) + end do + end do + deallocate(meMatrix) + end do + end do + end do + end do + ! Type + ! 3. SOMO -> VMO + do i = 2, NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + do j = i,i, 2 + Jsomo = ISHFT(1_8,j)-1 + if(j .GT. NSOMOMax .OR. j .LE. 0) cycle + do k = 1,i + do l = 1,i + if(k .NE. l) then + Isomo = ISHFT(1_8,i+1)-1 + Isomo = IBCLR(Isomo,l-1) + Jsomo = ISHFT(1_8,j+1)-1 + Jsomo = IBCLR(Jsomo,k-1) + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,j)-1 + endif + + AIJpqContainer(i,j,3,k,l,:,:) = 0.0d0 + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + + orbp = k + orbq = l + allocate(meMatrix(rows,cols)) + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(i,j,3,k,l,ri,ci) = meMatrix(ri, ci) + end do + end do + deallocate(meMatrix) + end do + end do + end do + end do + ! Type + ! 4. DOMO -> SOMO + do i = 2, NSOMOMax, 2 + Isomo = ISHFT(1_8,i)-1 + do j = i,i, 2 + Jsomo = ISHFT(1_8,i)-1 + if(j .GT. NSOMOMax .OR. j .LE. 0) cycle + do k = 1,i + do l = 1,i + if(k .NE. l) then + Isomo = ISHFT(1_8,i+1)-1 + Isomo = IBCLR(Isomo,k-1) + Jsomo = ISHFT(1_8,j+1)-1 + Jsomo = IBCLR(Jsomo,l+1-1) + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,j)-1 + endif + + AIJpqContainer(i,j,4,k,l,:,:) = 0.0d0 + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) + + orbp = k + orbq = l + + allocate(meMatrix(rows,cols)) + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(i,j,4,k,l,ri,ci) = meMatrix(ri, ci) + end do + end do + deallocate(meMatrix) + end do + end do + end do + end do + END_PROVIDER + + +!!!!!! + + BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] + &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF)] + &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] + use cfunctions + use bitmasks + implicit none + BEGIN_DOC + ! Documentation for DetToCSFTransformationMatrix + ! Provides the matrix of transformatons for the + ! conversion between determinant to CSF basis (in BFs) + END_DOC + integer(bit_kind) :: mask(N_int), Ialpha(N_int),Ibeta(N_int) + integer :: rows, cols, i, j, k + integer :: startdet, enddet + integer*8 MS, Isomo, Idomo + integer ndetI + integer :: getNSOMO + real*8,dimension(:,:),allocatable :: tempBuffer + real*8,dimension(:),allocatable :: tempCoeff + real*8 :: norm_det1, phasedet + norm_det1 = 0.d0 + MS = elec_alpha_num - elec_beta_num + ! initialization + psi_coef_config = 0.d0 + DetToCSFTransformationMatrix(0,:,:) = 1.d0 + do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2 + Isomo = IBSET(0_8, i) - 1_8 + ! rows = Ncsfs + ! cols = Ndets + bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) + ndetI = max(1,nint((binom(i,(i+1)/2)))) + + allocate(tempBuffer(bfIcfg,ndetI)) + call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) + DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) + deallocate(tempBuffer) + enddo + + integer s, bfIcfg + integer countcsf + countcsf = 0 + integer countdet + countdet = 0 + integer idx + integer istate + istate = 1 + phasedet = 1.0d0 + do i = 1,N_configuration + startdet = psi_configuration_to_psi_det(1,i) + enddet = psi_configuration_to_psi_det(2,i) + ndetI = enddet-startdet+1 + + allocate(tempCoeff(ndetI)) + countdet = 1 + do j = startdet, enddet + idx = psi_configuration_to_psi_det_data(j) + Ialpha(:) = psi_det(:,1,idx) + Ibeta(:) = psi_det(:,2,idx) + call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) + tempCoeff(countdet) = psi_coef(idx, istate)*phasedet + norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) + countdet += 1 + enddo + + s = 0 + do k=1,N_int + if (psi_configuration(k,1,i) == 0_bit_kind) cycle + s = s + popcnt(psi_configuration(k,1,i)) + enddo + bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + + ! perhaps blocking with CFGs of same seniority + ! can be more efficient + allocate(tempBuffer(bfIcfg,ndetI)) + tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) + + call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1), size(psi_coef_config,1)) + !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) + + deallocate(tempCoeff) + deallocate(tempBuffer) + psi_config_data(i,1) = countcsf + 1 + countcsf += bfIcfg + psi_config_data(i,2) = countcsf + enddo + + END_PROVIDER diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c new file mode 100644 index 00000000..658c18e8 --- /dev/null +++ b/src/csf/tree_utils.c @@ -0,0 +1,309 @@ +#include "tree_utils.h" + +void buildTree(Tree *bftree, + Node **inode, + int isomo, + int izeros, + int icpl, + int NSOMOMax, + int MSmax){ + + // Find the maximum parallel couplings 0 + // the maximum anti-parallel couplings 1 + int zeromax = MSmax + (NSOMOMax-MSmax)/2; + int onemax = NSOMOMax - zeromax; + + // Exit condition + if(isomo > NSOMOMax || icpl < 0 || izeros > zeromax ) return; + + // If we find a valid BF assign its address + if(isomo == NSOMOMax){ + (*inode)->addr = bftree->rootNode->addr; + bftree->rootNode->addr += 1; + return; + } + + // Call 0 branch + if(((*inode)->C0) == NULL && izeros+1 <= zeromax){ + ((*inode)->C0) = malloc(sizeof(Node)); + (*(*inode)->C0) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 0, .iSOMO = isomo }; + buildTree(bftree, &(*inode)->C0, isomo+1, izeros+1, icpl+1, NSOMOMax, MSmax); + } + else buildTree(bftree, &(*inode)->C0, isomo+1, izeros+1, icpl+1, NSOMOMax, MSmax); + + // Call 1 branch + if(((*inode)->C1) == NULL && icpl-1 >= 0){ + ((*inode)->C1) = malloc(sizeof(Node)); + (*(*inode)->C1) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 1, .iSOMO = isomo }; + buildTree(bftree, &(*inode)->C1, isomo+1, izeros+0, icpl-1, NSOMOMax, MSmax); + } + else buildTree(bftree, &(*inode)->C1, isomo+1, izeros+0, icpl-1, NSOMOMax, MSmax); + + return; +} + +void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){ + int isomo = 0; // counts the total number of SOMO's + int izeros= 0; // Counts the total number of parallel coupings (i.e. 0's) + int icpl = 0; // keep track of the ith ms (cannot be -ve) + int addr = 0; // Counts the total BF's + + buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS); + + *NBF = bftree->rootNode->addr; +} + + +void getIthBF(Node *inode, int isomo, bool foundBF, int NSOMOMax, int getaddr, int *vecBF){ + // Exit condition + if(foundBF) return; + if(isomo > NSOMOMax) return; + if(inode == NULL) return; + + if(isomo == NSOMOMax){ + if(inode->addr == getaddr){ + for(int i = NSOMOMax-1; i > -1; i--){ + vecBF[i] = inode->cpl; + inode = inode->PREV; + } + foundBF = true; + return; + } + } + + // Recurse to C0 + if(inode->C0 != NULL){ + getIthBF(inode->C0, isomo+1, foundBF, NSOMOMax, getaddr, vecBF); + } + // Recurse to C1 + if(inode->C1 != NULL){ + getIthBF(inode->C1, isomo+1, foundBF, NSOMOMax, getaddr, vecBF); + } + + return; +} + +void getIthBFDriver(Tree *bftree, int NSOMOMax, int getaddr, int *vecBF){ + int isomo = 0; + bool foundBF = false; + getIthBF((bftree->rootNode), isomo, foundBF, NSOMOMax, getaddr, vecBF); +} + +void genDets(Tree *dettree, + Node **inode, + int isomo, + int izeros, + int icpl, + int NSOMOMax, + int MSmax){ + + // Find the maximum parallel couplings 0 + // the maximum anti-parallel couplings 1 + int zeromax = MSmax + (NSOMOMax-MSmax)/2; + int onemax = NSOMOMax - zeromax; + + // Exit condition + if(isomo > NSOMOMax || izeros > zeromax || abs(icpl) > onemax) return; + + // If we find a valid BF assign its address + if(isomo == NSOMOMax){ + (*inode)->addr = dettree->rootNode->addr; + dettree->rootNode->addr += 1; + return; + } + + // Call 0 branch + if(((*inode)->C0) == NULL && izeros+1 <= zeromax){ + ((*inode)->C0) = malloc(sizeof(Node)); + (*(*inode)->C0) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 0, .iSOMO = isomo }; + genDets(dettree, &(*inode)->C0, isomo+1, izeros+1, icpl+0, NSOMOMax, MSmax); + } + else genDets(dettree, &(*inode)->C0, isomo+1, izeros+1, icpl+0, NSOMOMax, MSmax); + + // Call 1 branch + if(((*inode)->C1) == NULL && abs(icpl+1) <= onemax){ + ((*inode)->C1) = malloc(sizeof(Node)); + (*(*inode)->C1) = (Node){ .C0 = NULL, .C1 = NULL, .PREV = *inode, .addr = -1, .cpl = 1, .iSOMO = isomo }; + genDets(dettree, &(*inode)->C1, isomo+1, izeros+0, icpl+1, NSOMOMax, MSmax); + } + else genDets(dettree, &(*inode)->C1, isomo+1, izeros+0, icpl+1, NSOMOMax, MSmax); + + return; +} + +void genDetsDriver(Tree *dettree, int NSOMO, int MS, int *Ndets){ + int isomo = 0; // counts the total number of SOMO's + int izeros= 0; // Counts the total number of parallel coupings (i.e. 0's) + int icpl = 0; // keep track of the ith ms (cannot be -ve) + int addr = 0; // Counts the total BF's + + genDets(dettree, &(dettree->rootNode), isomo, izeros, icpl, NSOMO, MS); + + *Ndets = dettree->rootNode->addr; +} + +void getIthDet(Node *inode, int isomo, bool foundBF, int NSOMOMax, int getaddr, int *vecBF){ + // Exit condition + if(foundBF) return; + if(isomo > NSOMOMax) return; + if(inode == NULL) return; + + if(isomo == NSOMOMax){ + if(inode->addr == getaddr){ + for(int i = NSOMOMax-1; i > -1; i--){ + vecBF[i] = inode->cpl; + inode = inode->PREV; + } + foundBF = true; + return; + } + } + + // Recurse to C0 + if(inode->C0 != NULL){ + getIthDet(inode->C0, isomo+1, foundBF, NSOMOMax, getaddr, vecBF); + } + // Recurse to C1 + if(inode->C1 != NULL){ + getIthDet(inode->C1, isomo+1, foundBF, NSOMOMax, getaddr, vecBF); + } + + return; +} + +void getIthDetDriver(Tree *dettree, int NSOMOMax, int getaddr, int *vecBF){ + int isomo = 0; + bool foundBF = false; + getIthDet((dettree->rootNode), isomo, foundBF, NSOMOMax, getaddr, vecBF); +} + +void findAddofDet(Node *inode, int isomo, bool foundDet, int NSOMOMax, int *inpdet, int *addr){ + // Exit condition + if(foundDet) return; + if(isomo == NSOMOMax){ + foundDet = true; + *addr = inode->addr; + return; + } + + // Recurse to C0 + if(inpdet[isomo] == 0){ + if(inode->C0 != NULL){ + findAddofDet(inode->C0, isomo+1, foundDet, NSOMOMax, inpdet, addr); + } + else{ + *addr = -1; + return; + } + } + else{ + // Recurse to C1 + if(inode->C1 != NULL){ + findAddofDet(inode->C1, isomo+1, foundDet, NSOMOMax, inpdet, addr); + } + else{ + *addr = -1; + return; + } + } + + return; +} + +void findAddofDetDriver(Tree *dettree, int NSOMOMax, int *inpdet, int *addr){ + *addr = -1; + int isomo = 0; + bool foundDet = false; + findAddofDet((dettree->rootNode), isomo, foundDet, NSOMOMax, inpdet, addr); +} + +void getDetlist(Node *inode, int isomo, int NSOMOMax, int *vecBF, int *detlist){ + // Exit condition + if(isomo > NSOMOMax) return; + if(inode == NULL) return; + + if(isomo == NSOMOMax){ + int idet=0; + for(int k=0;kaddr]=idet; + return; + } + + + // Recurse to C0 + if(inode->C0 != NULL){ + vecBF[isomo] = 0; + getDetlist(inode->C0, isomo+1, NSOMOMax, vecBF, detlist); + } + // Recurse to C1 + if(inode->C1 != NULL){ + vecBF[isomo] = 1; + getDetlist(inode->C1, isomo+1, NSOMOMax, vecBF, detlist); + } + + return; +} + +void getDetlistDriver(Tree *dettree, int NSOMOMax, int *detlist){ + int isomo = 0; + int vecBF[NSOMOMax]; + if(dettree->rootNode->addr > 1) getDetlist((dettree->rootNode), isomo, NSOMOMax, vecBF, detlist); +} + +void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){ + + int NSOMO=0; + getSetBits(Isomo, &NSOMO); + genDetsDriver(dettree, NSOMO, MS, ndets); + +} + +void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int colB, double *C, bool transA, bool transB){ + int m = rowA; + int k = colA; + int n = colB; + double alpha = 1.0; + double beta = 0.0; + int val = 0; + if (transA) val |= 0x1; + if (transB) val |= 0x2; + + switch (val) { + case 0: // notransA, notransB + m = rowA; + n = colB; + k = colA; + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + m, n, k, alpha, A, k, B, n, beta, C, n); + break; + case 1: // transA, notransB + m = colA; + n = colB; + k = rowA; + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, + m, n, k, alpha, A, colA, B, n, beta, C, n); + break; + case 2: // notransA, transB + //m = rowA; + //n = rowB; + //k = colB; + m = rowA; + n = rowB; + k = colA; + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + m, n, k, alpha, A, k, B, colB, beta, C, n); + break; + case 3: // transA, transB + m = colA; + n = rowB; + k = rowA; + cblas_dgemm(CblasRowMajor, CblasTrans, CblasTrans, + m, n, k, alpha, A, colA, B, colB, beta, C, n); + break; + default: + printf("Impossible !!!!\n"); + break; + } +} diff --git a/src/csf/tree_utils.h b/src/csf/tree_utils.h new file mode 100644 index 00000000..a9210191 --- /dev/null +++ b/src/csf/tree_utils.h @@ -0,0 +1,87 @@ +#ifndef TREE_UTILS_H +#define TREE_UTILS_H +#include +#include +#include +#include +#include + +typedef struct bin_node Node; +typedef struct bin_tree Tree; +struct bin_node { + Node *C0; + Node *C1; + Node *PREV; + int addr; + int cpl; + int iSOMO; +}; + +struct bin_tree { + Node *rootNode; + int NBF; +}; + +#include "/usr/include/x86_64-linux-gnu/cblas.h" + +#define MAX_SOMO 32 + +void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF); + +void buildTree(Tree *bftree, Node **inode, int isomo, int izeros, int icpl, int NSOMOMax, int MSmax); + +void printTreeDriver(Tree *bftree, int NSOMOMax); +void printTree(Node *bftree, int isomo, int NSOMOMax, int *vecBF); + +void getIthBF(Node *node, int isomo, bool foundBF, int NSOMOMax, int getaddr, int *vecBF); +void getIthBFDriver(Tree *bftree, int NSOMOMax, int getaddr, int *vecBF); + +void getBFIndexList(int NSOMO, int *BF1, int *IdxListBF1); +void getIslands(int NSOMO, int *BF1, int *BF2, int *nislands, int *phasefactor); + +void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOMO); +void getSetBits(int64_t n, int *nsetbits); +void getOverlapMatrix(int64_t Isomo, int64_t MS, double **overlapMatrixptr, int *rows, int *cols, int *NSOMOout); +void getOverlapMatrix_withDet(double *bftodetmatrixI, int rowsbftodetI, int colsbftodetI, int64_t Isomo, int64_t MS, double **overlapMatrixI, int *rowsI, int *colsI, int *NSOMO); +void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix); + + +void calculateMETypeSOMOSOMO(int *BF1, int *BF2, int moi, int moj, double *factor, int *phasefactor); +void getOneElMETypeSOMOSOMO(int64_t Isomo, int64_t Jsomos, int moi, int moj, int MS, double **oneElMatrixElementsptr, int *rows, int *cols); + +/*********************** + +Determinant Tree utils +***********************/ + + +void genDets(Tree *dettree, + Node **inode, + int isomo, + int izeros, + int icpl, + int NSOMOMax, + int MSmax); +void genDetsDriver(Tree *dettree, int NSOMO, int MS, int *Ndets); + +void getIthDet(Node *inode, int isomo, bool foundBF, int NSOMOMax, int getaddr, int *vecBF); +void getIthDetDriver(Tree *dettree, int NSOMOMax, int getaddr, int *vecBF); +void getDetlistDriver(Tree *dettree, int NSOMOMax, int *detlist); +void findAddofDet(Node *inode, int isomo, bool foundDet, int NSOMOMax, int *inpdet, int *addr); +void findAddofDetDriver(Tree *dettree, int NSOMOMax, int *inpdet, int *addr); + + +/************************/ + +void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets); +void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowvec); +void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *rows, int *cols); + +// Misc utils +void int_to_bin_digit(int64_t in, int count, int* out); +void printRealMatrix(double *orthoMatrix, int rows, int cols); +void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int colB, double *C, bool transA, bool transB); +void get_phase_cfg_to_qp_inpList(int *inpdet, int NSOMO, int *phaseout); +void get_phase_cfg_to_qp_inpInt(int inpdet, double *phaseout); + +#endif diff --git a/src/davidson/davidson_parallel_csf.irp.f b/src/davidson/davidson_parallel_csf.irp.f new file mode 100644 index 00000000..6e32dc11 --- /dev/null +++ b/src/davidson/davidson_parallel_csf.irp.f @@ -0,0 +1,495 @@ +use bitmasks +use f77_zmq + + +subroutine davidson_csf_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call davidson_csf_run_slave(1,i) +end + + +subroutine davidson_csf_slave_tcp(i) + implicit none + integer, intent(in) :: i + call davidson_csf_run_slave(0,i) +end + + + +subroutine davidson_csf_run_slave(thread,iproc) + use f77_zmq + implicit none + BEGIN_DOC +! Slave routine for Davidson's diagonalization. + END_DOC + + integer, intent(in) :: thread, iproc + + integer :: worker_id, task_id, blockb + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + integer, external :: connect_to_taskserver + integer :: doexit, send, receive + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + doexit = 0 + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + doexit=1 + endif + IRP_IF MPI + include 'mpif.h' + integer :: ierr + send = doexit + call MPI_AllReduce(send, receive, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + doexit=1 + endif + doexit = receive + IRP_ENDIF + if (doexit>0) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + call davidson_csf_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id) + + integer, external :: disconnect_from_taskserver + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + call sleep(1) + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + print *, irp_here, ': disconnect failed' + continue + endif + endif + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push) + +end subroutine + + + +subroutine davidson_csf_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR),intent(in) :: zmq_socket_push + integer,intent(in) :: worker_id, N_st, sze + integer :: task_id + character*(512) :: msg + integer :: imin, imax, ishift, istep + + integer, allocatable :: psi_det_read(:,:,:) + double precision, allocatable :: v_t(:,:), u_t(:,:) + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t + + ! Get wave function (u_t) + ! ----------------------- + + integer :: rc, ni, nj + integer*8 :: rc8 + integer :: N_states_read, N_det_read, psi_det_size_read + integer :: N_det_selectors_read, N_det_generators_read + + integer, external :: zmq_get_dvector + integer, external :: zmq_get_dmatrix + + PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique + PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc + PROVIDE ref_bitmask_energy nproc + PROVIDE mpi_initialized + + allocate(u_t(N_st,N_det)) + + ! Warning : dimensions are modified for efficiency, It is OK since we get the + ! full matrix + if (size(u_t,kind=8) < 8388608_8) then + ni = size(u_t) + nj = 1 + else + ni = 8388608 + nj = int(size(u_t,kind=8)/8388608_8,4) + 1 + endif + + do while (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) + print *, 'mpi_rank, N_states_diag, N_det' + print *, mpi_rank, N_states_diag, N_det + stop 'u_t' + enddo + + IRP_IF MPI + include 'mpif.h' + integer :: ierr + + call broadcast_chunks_double(u_t,size(u_t,kind=8)) + + IRP_ENDIF + + ! Run tasks + ! --------- + + logical :: sending + sending=.False. + + allocate(v_t(N_st,N_det)) + do + integer, external :: get_task_from_taskserver + integer, external :: task_done_to_taskserver + if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) == -1) then + exit + endif + if(task_id == 0) exit + read (msg,*) imin, imax, ishift, istep + integer :: k + do k=imin,imax + v_t(:,k) = 0.d0 + enddo + call H_u_0_nstates_openmp_work(v_t,u_t,N_st,N_det,imin,imax,ishift,istep) + if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then + print *, irp_here, 'Unable to send task_done' + endif + call davidson_push_results_async_recv(zmq_socket_push, sending) + call davidson_csf_push_results_async_send(zmq_socket_push, v_t, imin, imax, task_id, sending) + end do + deallocate(u_t,v_t) + call davidson_push_results_async_recv(zmq_socket_push, sending) + +end subroutine + + + +subroutine davidson_csf_push_results(zmq_socket_push, v_t, imin, imax, task_id) + use f77_zmq + implicit none + BEGIN_DOC +! Push the results of $H | U \rangle$ from a worker to the master. + END_DOC + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push + integer ,intent(in) :: task_id, imin, imax + double precision ,intent(in) :: v_t(N_states_diag,N_det) + integer :: rc, sz + integer*8 :: rc8 + + sz = (imax-imin+1)*N_states_diag + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_csf_push_results failed to push task_id' + + rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_csf_push_results failed to push imin' + + rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_csf_push_results failed to push imax' + + rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE) + if(rc8 /= 8_8*sz) stop 'davidson_csf_push_results failed to push vt' + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2)/='ok')) then + print *, irp_here, ': f77_zmq_recv( zmq_socket_push, ok, 2, 0)' + stop -1 + endif +IRP_ENDIF + +end subroutine + +subroutine davidson_csf_push_results_async_send(zmq_socket_push, v_t, imin, imax, task_id,sending) + use f77_zmq + implicit none + BEGIN_DOC +! Push the results of $H | U \rangle$ from a worker to the master. + END_DOC + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push + integer ,intent(in) :: task_id, imin, imax + double precision ,intent(in) :: v_t(N_states_diag,N_det) + logical ,intent(inout) :: sending + integer :: rc, sz + integer*8 :: rc8 + + if (sending) then + print *, irp_here, ': sending=true' + stop -1 + endif + sending = .True. + + sz = (imax-imin+1)*N_states_diag + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_csf_push_results failed to push task_id' + + rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_csf_push_results failed to push imin' + + rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_csf_push_results failed to push imax' + + rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE) + if(rc8 /= 8_8*sz) stop 'davidson_csf_push_results failed to push vt' + + +end subroutine + + + + +subroutine davidson_csf_pull_results(zmq_socket_pull, v_t, imin, imax, task_id) + use f77_zmq + implicit none + BEGIN_DOC +! Pull the results of $H | U \rangle$ on the master. + END_DOC + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull + integer ,intent(out) :: task_id, imin, imax + double precision ,intent(out) :: v_t(N_states_diag,N_det) + + integer :: rc, sz + integer*8 :: rc8 + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop 'davidson_csf_pull_results failed to pull task_id' + + rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) + if(rc /= 4) stop 'davidson_csf_pull_results failed to pull imin' + + rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) + if(rc /= 4) stop 'davidson_csf_pull_results failed to pull imax' + + sz = (imax-imin+1)*N_states_diag + + rc8 = f77_zmq_recv8( zmq_socket_pull, v_t(1,imin), 8_8*sz, 0) + if(rc8 /= 8*sz) stop 'davidson_csf_pull_results failed to pull v_t' + +! Activate if zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop -1 + endif +IRP_ENDIF + +end subroutine + + + + +subroutine davidson_csf_collector(zmq_to_qp_run_socket, zmq_socket_pull, v0, sze, N_st) + use f77_zmq + implicit none + BEGIN_DOC +! Routine collecting the results of the workers in Davidson's algorithm. + END_DOC + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + integer, intent(in) :: sze, N_st + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + + double precision ,intent(inout) :: v0(sze, N_st) + + integer :: more, task_id, imin, imax + + double precision, allocatable :: v_t(:,:) + logical :: sending + integer :: i,j + integer, external :: zmq_delete_task_async_send + integer, external :: zmq_delete_task_async_recv + + allocate(v_t(N_st,N_det)) + v0 = 0.d0 + more = 1 + sending = .False. + do while (more == 1) + call davidson_csf_pull_results(zmq_socket_pull, v_t, imin, imax, task_id) + if (zmq_delete_task_async_send(zmq_to_qp_run_socket,task_id,sending) == -1) then + stop 'davidson: Unable to delete task (send)' + endif + do j=1,N_st + do i=imin,imax + v0(i,j) = v0(i,j) + v_t(j,i) + enddo + enddo + if (zmq_delete_task_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then + stop 'davidson: Unable to delete task (recv)' + endif + end do + deallocate(v_t) + +end subroutine + + + + + +subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) + use omp_lib + use bitmasks + use f77_zmq + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$ + ! + ! n : number of determinants + ! + ! H_jj : array of $\langle j | H | j \rangle$ + END_DOC + integer, intent(in) :: N_st, sze + double precision, intent(out) :: v_0(sze,N_st) + double precision, intent(inout):: u_0(sze,N_st) + integer :: i,j,k + integer :: ithread + double precision, allocatable :: u_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull + PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique + PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc + PROVIDE ref_bitmask_energy nproc + PROVIDE mpi_initialized + + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson') + +! integer :: N_states_diag_save +! N_states_diag_save = N_states_diag +! N_states_diag = N_st + if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_states_diag on ZMQ server' + endif + + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + energy = 0.d0 + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + + + ! Create tasks + ! ============ + + integer :: istep, imin, imax, ishift, ipos + integer, external :: add_task_to_taskserver + integer, parameter :: tasksize=20000 + character*(100000) :: task + istep=1 + ishift=0 + imin=1 + + + ipos=1 + do imin=1,N_det,tasksize + imax = min(N_det,imin-1+tasksize) + if (imin==1) then + istep = 2 + else + istep = 1 + endif + do ishift=0,istep-1 + write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|' + ipos = ipos+50 + if (ipos > 100000-50) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task' + endif + ipos=1 + endif + enddo + enddo + + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task' + endif + ipos=1 + endif + + allocate(u_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + + ASSERT (N_st == N_states_diag) + ASSERT (sze >= N_det) + + integer :: rc, ni, nj + integer*8 :: rc8 + double precision :: energy(N_st) + + integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag + integer, external :: zmq_put_dmatrix + + if (size(u_t) < 8388608) then + ni = size(u_t) + nj = 1 + else + ni = 8388608 + nj = size(u_t)/8388608 + 1 + endif + ! Warning : dimensions are modified for efficiency, It is OK since we get the + ! full matrix + if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then + stop 'Unable to put u_t on ZMQ server' + endif + + deallocate(u_t) + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + call omp_set_max_active_levels(4) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread == 0 ) then + call davidson_csf_collector(zmq_to_qp_run_socket, zmq_socket_pull, v_0, N_det, N_st) + else + call davidson_csf_slave_inproc(1) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'davidson') + + !$OMP PARALLEL + !$OMP SINGLE + do k=1,N_st + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det) + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + !$OMP END TASK + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + !$OMP END TASK + enddo + !$OMP END SINGLE + !$OMP TASKWAIT + !$OMP END PARALLEL + +! N_states_diag = N_states_diag_save +! SOFT_TOUCH N_states_diag +end + diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f new file mode 100644 index 00000000..54db55a8 --- /dev/null +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -0,0 +1,582 @@ +subroutine davidson_diag_h_csf(dets_in,u_in,dim_in,energies,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + integer, intent(in) :: dressing_state + logical, intent(out) :: converged + double precision, allocatable :: H_jj(:) + + double precision, external :: diag_H_mat_elem, diag_S_mat_elem + integer :: i,k + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_two_e_integrals_in_map + allocate(H_jj(sze)) + + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze,H_jj, dets_in,Nint) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(static) + do i=2,sze + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + if (dressing_state > 0) then + do k=1,N_st + do i=1,sze + H_jj(i) += u_in(i,k) * dressing_column_h(i,k) + enddo + enddo + endif + + call davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + deallocate(H_jj) +end + + +subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged) + use bitmasks + use mmap_module + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + integer, intent(in) :: dressing_state + double precision, intent(inout) :: u_in(dim_in,N_st_diag_in) + double precision, intent(out) :: energies(N_st_diag_in) + + integer :: iter, N_st_diag + integer :: i,j,k,l,m + logical, intent(inout) :: converged + + double precision, external :: u_dot_v, u_dot_u + + integer :: k_pairs, kl + + integer :: iter2, itertot + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(2,N_st) + double precision :: cpu, wall + integer :: shift, shift2, itermax, istate + double precision :: r1, r2, alpha + logical :: state_ok(N_st_diag_in*davidson_sze_max) + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: cmax + double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) + double precision, pointer :: W(:,:), W_csf(:,:) + logical :: disk_based + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + + include 'constants.include.F' + + N_st_diag = N_st_diag_in + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, S_d, h, lambda + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1 + itertot = 0 + + if (state_following) then + allocate(overlap(N_st_diag*itermax, N_st_diag*itermax)) + else + allocate(overlap(1,1)) ! avoid 'if' for deallocate + endif + overlap = 0.d0 + + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2 + + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.0d0*dble(sze*m)*(N_st_diag*itermax) &! W + + 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp + + 1.d0*(N_st_diag*itermax) &! lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_u_0_nstates_zmq + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave + + 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(r1,irp_here) + nproc_target = 1 + exit + endif + + if (r1+rss < qp_max_mem) then + exit + endif + + if (itermax > 4) then + itermax = itermax - 1 + else if (m==1.and.disk_based_davidson) then + m=0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + call write_int(6,N_st,'Number of states') + call write_int(6,N_st_diag,'Number of states in diagonalization') + call write_int(6,sze,'Number of determinants') + call write_int(6,nproc_target,'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if (disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + if (disk_based) then + ! Create memory-mapped files for W and S + type(c_ptr) :: ptr_w, ptr_s + integer :: fd_s, fd_w + call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& + 8, fd_w, .False., ptr_w) + call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) + else + allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax),) + endif + + allocate( & + ! Large + U(sze,N_st_diag), & + U_csf(sze_csf,N_st_diag*itermax), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + residual_norm(N_st_diag), & + lambda(N_st_diag*itermax)) + + h = 0.d0 + U = 0.d0 + y = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + + do k=N_st+1,N_st_diag + do i=1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) + enddo + u_in(k,k) = u_in(k,k) + 10.d0 + enddo + do k=1,N_st_diag + call normalize(u_in(1,k),sze) + enddo + + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! Make random verctors eigenstates of S2 + call convertWFfromDETtoCSF(N_st_diag,U,U_csf) + call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + + do while (.not.converged) + itertot = itertot+1 + if (itertot == 8) then + exit + endif + + do iter=1,itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + + if ((iter > 1).or.(itertot == 1)) then + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- + + if (disk_based) then + call ortho_qr_unblocked(U_csf,size(U_csf,1),sze_csf,shift2) + call ortho_qr_unblocked(U_csf,size(U_csf,1),sze_csf,shift2) + else + call ortho_qr(U_csf,size(U_csf,1),sze_csf,shift2) + call ortho_qr(U_csf,size(U_csf,1),sze_csf,shift2) + endif + + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) + if ((sze > 100000).and.distributed_davidson) then + call H_u_0_nstates_zmq (W,U,N_st_diag,sze) + else + call H_u_0_nstates_openmp(W,U,N_st_diag,sze) + endif + else + ! Already computed in update below + continue + endif + + if (dressing_state > 0) then + + if (N_st == 1) then + + l = dressed_column_idx(1) + double precision :: f + f = 1.0d0/psi_coef(l,1) + do istate=1,N_st_diag + do i=1,sze + W(i,istate) += dressing_column_h(i,1) *f * U(l,istate) + W(l,istate) += dressing_column_h(i,1) *f * U(i,istate) + enddo + + enddo + + else + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + psi_coef, size(psi_coef,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + dressing_column_h, size(dressing_column_h,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + endif + endif + + call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) + + ! Compute h_kl = = + ! ------------------------------------------- + + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1), & + 0.d0, h, size(h,1)) + + ! Diagonalize h + ! --------------- + + call lapack_diag(lambda,y,h,size(h,1),shift2) + + ! Compute Energy for each eigenvector + ! ----------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, h, size(h,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, h, size(h,1)) + + do k=1,shift2 + lambda(k) = h(k,k) + enddo + + if (state_following) then + + overlap = -1.d0 + do k=1,shift2 + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + enddo + + endif + + + ! Express eigenvectors of h in the csf basis + ! ------------------------------------------ + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1)) + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1)) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U) + call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W) + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze +! U_csf(i,shift2+k) = & +! (lambda(k) * U_csf(i,shift2+k) - W_csf(i,shift2+k) ) & +! /max(H_jj_csf(i) - lambda (k),1.d-2) + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & + /max(H_jj(i) - lambda (k),1.d-2) + enddo + enddo + !$OMP END PARALLEL DO + + do k=1,N_st + residual_norm(k) = u_dot_u(U(1,k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + enddo + call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1)) + + if ((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if (iter > 1) then + if (threshold_davidson_from_pt2) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2 + else + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson + endif + endif + + do k=1,N_st + if (residual_norm(k) > 1.d8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if (converged) then + exit + endif + + logical, external :: qp_stop + if (qp_stop()) then + converged = .True. + exit + endif + + + enddo + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + W_csf(i,k) = u_in(i,k) + enddo + enddo + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + + do k=1,N_st_diag + do i=1,sze_csf + U_csf(i,k) = u_in(i,k) + enddo + enddo + + if (disk_based) then + call ortho_qr_unblocked(U_csf,size(U_csf,1),sze_csf,N_st_diag) + call ortho_qr_unblocked(U_csf,size(U_csf,1),sze_csf,N_st_diag) + else + call ortho_qr(U_csf,size(U_csf,1),sze_csf,N_st_diag) + call ortho_qr(U_csf,size(U_csf,1),sze_csf,N_st_diag) + endif + + call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + + ! Adjust the phase + do j=1,N_st_diag + ! Find first non-zero + k=1 + do while ((k