10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-03 01:55:59 +01:00

Looks like reduced loop works.

This commit is contained in:
vijay gopal chilkuri 2021-03-17 15:34:22 +01:00
parent 6e634b0779
commit 5db2680ed5
3 changed files with 355 additions and 33 deletions

View File

@ -1,3 +1,327 @@
subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI)
implicit none
use bitmasks
BEGIN_DOC
! Documentation for obtain_connected_I_foralpha
! This function returns all those selected configurations
! which are connected to the input configuration
! givenI by a single excitation.
!
! The type of excitations are ordered as follows:
! Type 1 - SOMO -> SOMO
! Type 2 - DOMO -> VMO
! Type 3 - SOMO -> VMO
! Type 4 - DOMO -> SOMO
!
! Order of operators
! \alpha> = a^\dag_p a_q |I> = E_pq |I>
END_DOC
integer ,intent(in) :: idxI
integer(bit_kind),intent(in) :: givenI(N_int,2)
integer(bit_kind),intent(out) :: connectedI(N_int,2,*)
integer ,intent(out) :: idxs_connectedI(*)
integer,intent(out) :: nconnectedI
integer*8 :: Idomo
integer*8 :: Isomo
integer*8 :: Jdomo
integer*8 :: Jsomo
integer*8 :: IJsomo
integer*8 :: diffSOMO
integer*8 :: diffDOMO
integer*8 :: xordiffSOMODOMO
integer :: ndiffSOMO
integer :: ndiffDOMO
integer :: nxordiffSOMODOMO
integer :: ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
integer :: listholes(mo_num)
integer :: holetype(mo_num)
integer :: end_index
integer :: Nsomo_I
!
! 2 2 1 1 0 0 : 1 1 0 0 0 0
! 0 0 1 1 0 0
!
! 2 1 1 1 1 0 : 1 0 0 0 0 0
! 0 1 1 1 1 0
!xorS 0 1 0 0 1 0 : 2
!xorD 0 1 0 0 0 0 : 1
!xorSD 0 0 0 0 1 0 : 1
! -----
! 4
! 1 1 1 1 1 1 : 0 0 0 0 0 0
! 1 1 1 1 1 1
! 1 1 0 0 1 1 : 4
! 1 1 0 0 0 0 : 2
! 0 0 0 0 1 1 : 2
! -----
! 8
!
nconnectedI = 0
end_index = N_configuration
! Since CFGs are sorted wrt to seniority
! we don't have to search the full CFG list
Isomo = givenI(1,1)
Idomo = givenI(1,2)
Nsomo_I = POPCNT(Isomo)
end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1)
if(end_index .LT. 0) end_index= N_configuration
!end_index = N_configuration
!print *,"Start and End = ",idxI, end_index
p = 0
q = 0
do i=idxI,end_index
!if(.True.) then
! nconnectedI += 1
! connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
! idxs_connectedI(nconnectedI)=i
! cycle
!endif
Isomo = givenI(1,1)
Idomo = givenI(1,2)
Jsomo = psi_configuration(1,1,i)
Jdomo = psi_configuration(1,2,i)
diffSOMO = IEOR(Isomo,Jsomo)
ndiffSOMO = POPCNT(diffSOMO)
diffDOMO = IEOR(Idomo,Jdomo)
xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
ndiffDOMO = POPCNT(diffDOMO)
nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
!-------
! MONO |
!-------
nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
idxs_connectedI(nconnectedI)=i
else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then
!----------------------------
! DOMO -> VMO + DOMO -> VMO |
!----------------------------
nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
idxs_connectedI(nconnectedI)=i
else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then
!----------------------------
! DOUBLE
!----------------------------
nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
idxs_connectedI(nconnectedI)=i
else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then
!-----------------
! DOUBLE
!-----------------
nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
idxs_connectedI(nconnectedI)=i
else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then
!-----------------
! DOUBLE
!-----------------
nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
idxs_connectedI(nconnectedI)=i
else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
!--------
! I = I |
!--------
nconnectedI += 1
connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
idxs_connectedI(nconnectedI)= i
endif
end do
end subroutine obtain_connected_J_givenI
subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
implicit none
use bitmasks
BEGIN_DOC
! Documentation for obtain_connected_I_foralpha
! This function returns all those selected configurations
! which are connected to the input configuration
! Ialpha by a single excitation.
!
! The type of excitations are ordered as follows:
! Type 1 - SOMO -> SOMO
! Type 2 - DOMO -> VMO
! Type 3 - SOMO -> VMO
! Type 4 - DOMO -> SOMO
!
! Order of operators
! \alpha> = a^\dag_p a_q |I> = E_pq |I>
END_DOC
integer ,intent(in) :: idxI
integer ,intent(in) :: nconnectedJ
integer(bit_kind),intent(in) :: listconnectedJ(N_int,2,*)
integer(bit_kind),intent(in) :: Ialpha(N_int,2)
integer(bit_kind),intent(out) :: connectedI(N_int,2,*)
integer ,intent(in) :: idslistconnectedJ(*)
integer ,intent(out) :: idxs_connectedI(*)
integer,intent(out) :: nconnectedI
integer,intent(out) :: excitationIds(2,*)
integer,intent(out) :: excitationTypes(*)
real*8 ,intent(out) :: diagfactors(*)
integer*8 :: Idomo
integer*8 :: Isomo
integer*8 :: Jdomo
integer*8 :: Jsomo
integer*8 :: IJsomo
integer*8 :: diffSOMO
integer*8 :: diffDOMO
integer*8 :: xordiffSOMODOMO
integer :: ndiffSOMO
integer :: ndiffDOMO
integer :: nxordiffSOMODOMO
integer :: ii,i,j,k,kk,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes, idxJ
integer :: listholes(mo_num)
integer :: holetype(mo_num)
integer :: end_index
integer :: Nsomo_alpha
logical :: isOKlistJ
isOKlistJ = .False.
nconnectedI = 0
end_index = N_configuration
! Since CFGs are sorted wrt to seniority
! we don't have to search the full CFG list
Isomo = Ialpha(1,1)
Idomo = Ialpha(1,2)
Nsomo_alpha = POPCNT(Isomo)
end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
if(end_index .LT. 0) end_index= N_configuration
!end_index = N_configuration
p = 0
q = 0
do i=1,nconnectedJ
idxJ = idslistconnectedJ(i)
Isomo = Ialpha(1,1)
Idomo = Ialpha(1,2)
Jsomo = listconnectedJ(1,1,i)
Jdomo = listconnectedJ(1,2,i)
diffSOMO = IEOR(Isomo,Jsomo)
ndiffSOMO = POPCNT(diffSOMO)
diffDOMO = IEOR(Idomo,Jdomo)
xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
ndiffDOMO = POPCNT(diffDOMO)
nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
select case(ndiffDOMO)
case (0)
! SOMO -> VMO
!print *,"obt SOMO -> VMO"
extyp = 3
IJsomo = IEOR(Isomo, Jsomo)
p = TRAILZ(IAND(Isomo,IJsomo)) + 1
IJsomo = IBCLR(IJsomo,p-1)
q = TRAILZ(IJsomo) + 1
case (1)
! DOMO -> VMO
! or
! SOMO -> SOMO
nsomoJ = POPCNT(Jsomo)
nsomoalpha = POPCNT(Isomo)
if(nsomoJ .GT. nsomoalpha) then
! DOMO -> VMO
!print *,"obt DOMO -> VMO"
extyp = 2
p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
Isomo = IEOR(Isomo, Jsomo)
Isomo = IBCLR(Isomo,p-1)
q = TRAILZ(Isomo) + 1
else
! SOMO -> SOMO
!print *,"obt SOMO -> SOMO"
extyp = 1
q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
Isomo = IEOR(Isomo, Jsomo)
Isomo = IBCLR(Isomo,q-1)
p = TRAILZ(Isomo) + 1
end if
case (2)
! DOMO -> SOMO
!print *,"obt DOMO -> SOMO"
extyp = 4
IJsomo = IEOR(Isomo, Jsomo)
p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
IJsomo = IBCLR(IJsomo,p-1)
q = TRAILZ(IJsomo) + 1
case default
print *,"something went wront in get connectedI"
end select
starti = psi_config_data(idxJ,1)
endi = psi_config_data(idxJ,2)
nconnectedI += 1
connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
idxs_connectedI(nconnectedI)=starti
excitationIds(1,nconnectedI)=p
excitationIds(2,nconnectedI)=q
excitationTypes(nconnectedI) = extyp
diagfactors(nconnectedI) = 1.0d0
else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
! find out all pq holes possible
nholes = 0
! holes in SOMO
Isomo = listconnectedJ(1,1,i)
Idomo = listconnectedJ(1,2,i)
do ii = 1,mo_num
if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
nholes += 1
listholes(nholes) = ii
holetype(nholes) = 1
endif
end do
! holes in DOMO
do ii = 1,mo_num
if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
nholes += 1
listholes(nholes) = ii
holetype(nholes) = 2
endif
end do
do k=1,nholes
p = listholes(k)
q = p
extyp = 1
if(holetype(k) .EQ. 1) then
starti = psi_config_data(idxJ,1)
endi = psi_config_data(idxJ,2)
nconnectedI += 1
connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
idxs_connectedI(nconnectedI)=starti
excitationIds(1,nconnectedI)=p
excitationIds(2,nconnectedI)=q
excitationTypes(nconnectedI) = extyp
diagfactors(nconnectedI) = 1.0d0
else
starti = psi_config_data(idxJ,1)
endi = psi_config_data(idxJ,2)
nconnectedI += 1
connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
idxs_connectedI(nconnectedI)=starti
excitationIds(1,nconnectedI)=p
excitationIds(2,nconnectedI)=q
excitationTypes(nconnectedI) = extyp
diagfactors(nconnectedI) = 2.0d0
endif
enddo
endif
end do
end subroutine obtain_connected_I_foralpha_fromfilterdlist
subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
implicit none
use bitmasks
@ -61,31 +385,15 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
Idomo = Ialpha(1,2)
Jsomo = psi_configuration(1,1,i)
Jdomo = psi_configuration(1,2,i)
!call debug_spindet(Isomo,1)
!call debug_spindet(Idomo,1)
!print *,"-J--i=",i,Idomo,Jdomo,">",N_configuration
!call debug_spindet(Jsomo,1)
!call debug_spindet(Jdomo,1)
diffSOMO = IEOR(Isomo,Jsomo)
ndiffSOMO = POPCNT(diffSOMO)
!if(ndiffSOMO .NE. 2 .AND. ndiffSOMO .NE. 0) then
! cycle
!endif
diffDOMO = IEOR(Idomo,Jdomo)
xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
ndiffDOMO = POPCNT(diffDOMO)
nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
!print *,"-I--i=",i,ndiffSOMO,ndiffDOMO,nxordiffSOMODOMO!Isomo,Jsomo,ndiffSOMO,ndiffDOMO
!if((ndiffSOMO + ndiffDOMO) .EQ. 0) cycle
!print *,POPCNT(IEOR(diffSOMO,diffDOMO)), ndiffDOMO
!if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
!call debug_spindet(Isomo,1)
!call debug_spindet(Idomo,1)
!print *,"-J--i=",i,Idomo,Jdomo,">",N_configuration
!call debug_spindet(Jsomo,1)
!call debug_spindet(Jdomo,1)
!print *," --- IdsJ=",i
select case(ndiffDOMO)
case (0)
! SOMO -> VMO
@ -138,7 +446,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
excitationIds(2,nconnectedI)=q
excitationTypes(nconnectedI) = extyp
diagfactors(nconnectedI) = 1.0d0
!print *,"------ > output p,q in obt=",p,q
else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
! find out all pq holes possible
nholes = 0
@ -175,7 +482,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
excitationIds(2,nconnectedI)=q
excitationTypes(nconnectedI) = extyp
diagfactors(nconnectedI) = 1.0d0
!print *,"------ > output p,q in obt=",p,q
else
starti = psi_config_data(i,1)
endi = psi_config_data(i,2)
@ -186,7 +492,6 @@ subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI
excitationIds(2,nconnectedI)=q
excitationTypes(nconnectedI) = extyp
diagfactors(nconnectedI) = 2.0d0
!print *,"------ > output p,q in obt=",p,q
endif
enddo
endif

View File

@ -905,6 +905,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
real*8,intent(out) :: psi_out(n_st,sze)
integer(bit_kind) :: Icfg(N_INT,2)
integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk
integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ
integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg
integer(bit_kind),dimension(:,:,:),allocatable :: singlesI
integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha
@ -914,6 +915,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
integer,dimension(:),allocatable :: excitationTypes_single
integer,dimension(:,:),allocatable :: excitationIds
integer,dimension(:),allocatable :: excitationTypes
integer,dimension(:),allocatable :: idslistconnectedJ
real*8,dimension(:),allocatable :: diagfactors
integer :: nholes
integer :: nvmos
@ -928,6 +930,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
integer :: rowsTKI
integer :: noccpp
integer :: istart_cfg, iend_cfg
integer :: nconnectedJ
integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta
integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk
real*8 :: norm_coef_cfg, fac2eints
@ -945,6 +948,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
! allocate
allocate(alphas_Icfg(N_INT,2,max(sze,100)))
allocate(listconnectedJ(N_INT,2,max(sze,100)))
allocate(singlesI(N_INT,2,max(sze,100)))
allocate(connectedI_alpha(N_INT,2,max(sze,100)))
allocate(idxs_singlesI(max(sze,100)))
@ -953,6 +957,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
allocate(excitationTypes_single(max(sze,100)))
allocate(excitationIds(2,max(sze,100)))
allocate(excitationTypes(max(sze,100)))
allocate(idslistconnectedJ(max(sze,100)))
allocate(diagfactors(max(sze,100)))
!print *," sze = ",sze
@ -1109,16 +1114,25 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
!call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg)
Nalphas_Icfg = NalphaIcfg_list(i)
alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
!print *,"I=",i," Nal=",Nalphas_Icfg
call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ)
!print *,"size listconnected=",size(listconnectedJ)
!do k=1,nconnectedJ
! print *," idJ =",idslistconnectedJ(k)
!enddo
! TODO : remove doubly excited for return
! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
do k = 1,Nalphas_Icfg
! Now generate all singly excited with respect to a given alpha CFG
call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors)
!call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors)
call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors)
!print *,"\t---Ia=",k," NconI=",nconnectedI
if(nconnectedI .EQ. 0) then
cycle
endif
totcolsTKI = 0
rowsTKI = -1
do j = 1,nconnectedI
@ -1148,6 +1162,7 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
! dims : (totcolsTKI, nconnectedI)
allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs
allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs
!print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI
totcolsTKI = 0
do j = 1,nconnectedI
@ -1160,19 +1175,21 @@ subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze
rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
allocate(CCmattmp(colsikpq,n_st))
!do kk = 1,n_st
!do m = 1,colsikpq
! CCmattmp(m,kk) = psi_in(idxs_connectedI_alpha(j)+m-1,kk)
!enddo
!enddo
do kk = 1,n_st
do m = 1,colsikpq
CCmattmp(m,kk) = psi_in(kk,idxs_connectedI_alpha(j)+m-1)
enddo
enddo
do m = 1,colsikpq
do l = 1,rowsTKI
do kk = 1,n_st
!tmpvar = CCmattmp(m,kk)
tmpvar = CCmattmp(m,kk)
do l = 1,rowsTKI
!TKI(kk,l,totcolsTKI+m) = AIJpqContainer(NSOMOalpha,extype,pmodel,qmodel,l,m) * tmpvar
!TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * tmpvar
TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
!TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha)
enddo
TKI(kk,:,totcolsTKI+m) *= tmpvar
enddo
enddo
deallocate(CCmattmp)

View File

@ -314,9 +314,9 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
!call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
! call calculate_sigma_vector_cfg_nst(W_csf(1,shift+1),U_csf(1,shift+1),N_st_diag,sze_csf,1,sze_csf,0,1)
! ! TODO : psi_det_size ? for psi_det
!allocate(tmpW(sze_csf,N_st_diag))
!!allocate(tmpW(sze_csf,N_st_diag))
!!allocate(tmpU(sze_csf,N_st_diag))
allocate(tmpW(N_st_diag,sze_csf))
!allocate(tmpU(sze_csf,N_st_diag))
allocate(tmpU(N_st_diag,sze_csf))
do kk=1,N_st_diag
do ii=1,sze_csf
@ -341,9 +341,9 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
!call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift+1))
!call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
! call calculate_sigma_vector_cfg_nst(W_csf(1,shift+1),U_csf(1,shift+1),N_st_diag,sze_csf,1,sze_csf,0,1)
!allocate(tmpW(sze_csf,N_st_diag))
!!allocate(tmpW(sze_csf,N_st_diag))
!!allocate(tmpU(sze_csf,N_st_diag))
allocate(tmpW(N_st_diag,sze_csf))
!allocate(tmpU(sze_csf,N_st_diag))
allocate(tmpU(N_st_diag,sze_csf))
do kk=1,N_st_diag
do ii=1,sze_csf