mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-12-30 15:15:38 +01:00
Merge branch 'dev-stable' of github.com:QuantumPackage/qp2 into dev-stable
This commit is contained in:
commit
6269cb6516
62
config/gfortran_mkl.cfg
Normal file
62
config/gfortran_mkl.cfg
Normal file
@ -0,0 +1,62 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -ffree-line-length-none : Needed for IRPF90 which produces long lines
|
||||
# -lblas -llapack : Link with libblas and liblapack libraries provided by the system
|
||||
# -I . : Include the curent directory (Mandatory)
|
||||
#
|
||||
# --ninja : Allow the utilisation of ninja. (Mandatory)
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
#
|
||||
[COMMON]
|
||||
FC : gfortran -ffree-line-length-none -I . -mavx -g -fPIC -std=legacy
|
||||
LAPACK_LIB : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations.
|
||||
# It also enables optimizations that are not valid
|
||||
# for all standard-compliant programs. It turns on
|
||||
# -ffast-math and the Fortran-specific
|
||||
# -fno-protect-parens and -fstack-arrays.
|
||||
[OPT]
|
||||
FCFLAGS : -Ofast -mavx
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -Ofast
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -fcheck=all : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
#
|
||||
[DEBUG]
|
||||
FCFLAGS : -fcheck=all -g
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -fopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
@ -99,7 +99,7 @@ size: (ao_basis.ao_num)
|
||||
type: double precision
|
||||
doc: coefficients of the 1-electron Jastrow in AOsxAOs
|
||||
interface: ezfio
|
||||
size: (ao_basis.ao_num*ao_basis.ao_num)
|
||||
size: (ao_basis.ao_num,ao_basis.ao_num)
|
||||
|
||||
[j1e_coef_ao3]
|
||||
type: double precision
|
||||
|
@ -78,7 +78,7 @@ END_PROVIDER
|
||||
double precision :: cx, cy, cz
|
||||
double precision :: time0, time1
|
||||
double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
|
||||
double precision, allocatable :: coef_fit(:), coef_fit2(:), coef_fit3(:,:)
|
||||
double precision, allocatable :: coef_fit(:), coef_fit2(:,:), coef_fit3(:,:)
|
||||
|
||||
PROVIDE j1e_type
|
||||
|
||||
@ -243,7 +243,7 @@ END_PROVIDER
|
||||
|
||||
PROVIDE aos_grad_in_r_array
|
||||
|
||||
allocate(coef_fit2(ao_num*ao_num))
|
||||
allocate(coef_fit2(ao_num,ao_num))
|
||||
|
||||
if(mpi_master) then
|
||||
call ezfio_has_jastrow_j1e_coef_ao2(exists)
|
||||
@ -254,7 +254,7 @@ END_PROVIDER
|
||||
IRP_ENDIF
|
||||
IRP_IF MPI
|
||||
include 'mpif.h'
|
||||
call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||
call MPI_BCAST(coef_fit2, (ao_num*ao_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||
if (ierr /= MPI_SUCCESS) then
|
||||
stop 'Unable to read j1e_coef_ao2 with MPI'
|
||||
endif
|
||||
@ -264,7 +264,7 @@ END_PROVIDER
|
||||
write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao2 ] <<<<< ..'
|
||||
call ezfio_get_jastrow_j1e_coef_ao2(coef_fit2)
|
||||
IRP_IF MPI
|
||||
call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||
call MPI_BCAST(coef_fit2, (ao_num*ao_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||
if (ierr /= MPI_SUCCESS) then
|
||||
stop 'Unable to read j1e_coef_ao2 with MPI'
|
||||
endif
|
||||
@ -272,14 +272,14 @@ END_PROVIDER
|
||||
endif
|
||||
else
|
||||
|
||||
call get_j1e_coef_fit_ao2(ao_num*ao_num, coef_fit2)
|
||||
call get_j1e_coef_fit_ao2(ao_num, coef_fit2)
|
||||
call ezfio_set_jastrow_j1e_coef_ao2(coef_fit2)
|
||||
|
||||
endif
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, ij, ipoint, c) &
|
||||
!$OMP PRIVATE (i, j, ipoint, c) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, &
|
||||
!$OMP aos_grad_in_r_array, coef_fit2, &
|
||||
!$OMP aos_in_r_array, j1e_gradx, j1e_grady, j1e_gradz)
|
||||
@ -292,9 +292,7 @@ END_PROVIDER
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
c = coef_fit2(ij)
|
||||
c = coef_fit2(j,i)
|
||||
|
||||
j1e_gradx(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,1) + aos_grad_in_r_array(i,ipoint,1) * aos_in_r_array(j,ipoint))
|
||||
j1e_grady(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,2) + aos_grad_in_r_array(i,ipoint,2) * aos_in_r_array(j,ipoint))
|
||||
|
@ -120,15 +120,18 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
|
||||
|
||||
implicit none
|
||||
integer , intent(in) :: dim_fit
|
||||
double precision, intent(out) :: coef_fit(dim_fit)
|
||||
double precision, intent(out) :: coef_fit(dim_fit,dim_fit)
|
||||
|
||||
integer :: i, j, k, l, ipoint
|
||||
integer :: ij, kl
|
||||
integer :: ij, kl, mn
|
||||
integer :: info, n_svd, LWORK
|
||||
double precision :: g
|
||||
double precision :: t0, t1
|
||||
double precision, allocatable :: A(:,:), b(:), A_inv(:,:)
|
||||
double precision :: cutoff_svd, D1_inv
|
||||
double precision, allocatable :: A(:,:,:,:), b(:)
|
||||
double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
|
||||
double precision, allocatable :: u1e_tmp(:)
|
||||
double precision, allocatable :: u1e_tmp(:), tmp(:,:,:)
|
||||
double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:)
|
||||
|
||||
|
||||
PROVIDE j1e_type
|
||||
@ -136,6 +139,9 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
|
||||
PROVIDE elec_alpha_num elec_beta_num elec_num
|
||||
PROVIDE mo_coef
|
||||
|
||||
|
||||
cutoff_svd = 1d-10
|
||||
|
||||
call wall_time(t0)
|
||||
print*, ' PROVIDING the representation of 1e-Jastrow in AOs x AOs ... '
|
||||
|
||||
@ -169,98 +175,123 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
|
||||
! --- --- ---
|
||||
! get A
|
||||
|
||||
allocate(A(ao_num*ao_num,ao_num*ao_num))
|
||||
allocate(tmp(n_points_final_grid,ao_num,ao_num))
|
||||
allocate(A(ao_num,ao_num,ao_num,ao_num))
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, &
|
||||
!$OMP final_weight_at_r_vector, aos_in_r_array_transp, A)
|
||||
!$OMP PRIVATE (i, j, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do k = 1, ao_num
|
||||
do l = 1, ao_num
|
||||
kl = (k-1)*ao_num + l
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
A(ij,kl) = 0.d0
|
||||
do i = 1, ao_num
|
||||
do ipoint = 1, n_points_final_grid
|
||||
A(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) &
|
||||
* aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l)
|
||||
enddo
|
||||
enddo
|
||||
tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
! print *, ' A'
|
||||
! do ij = 1, ao_num*ao_num
|
||||
! write(*, '(100000(f15.7))') (A(ij,kl), kl = 1, ao_num*ao_num)
|
||||
! enddo
|
||||
call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
|
||||
, tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid &
|
||||
, 0.d0, A(1,1,1,1), ao_num*ao_num)
|
||||
|
||||
! --- --- ---
|
||||
! get b
|
||||
|
||||
allocate(b(ao_num*ao_num))
|
||||
|
||||
do ipoint = 1, n_points_final_grid
|
||||
u1e_tmp(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * u1e_tmp(ipoint)
|
||||
enddo
|
||||
|
||||
call dgemv("T", n_points_final_grid, ao_num*ao_num, 1.d0, tmp(1,1,1), n_points_final_grid, u1e_tmp(1), 1, 0.d0, b(1), 1)
|
||||
!call dgemm( "T", "N", ao_num*ao_num, 1, n_points_final_grid, 1.d0 &
|
||||
! , tmp(1,1,1), n_points_final_grid, u1e_tmp(1), n_points_final_grid &
|
||||
! , 0.d0, b(1), ao_num*ao_num)
|
||||
|
||||
deallocate(u1e_tmp)
|
||||
deallocate(tmp)
|
||||
|
||||
! --- --- ---
|
||||
! solve Ax = b
|
||||
|
||||
! double precision, allocatable :: A_inv(:,:,:,:)
|
||||
! allocate(A_inv(ao_num,ao_num,ao_num,ao_num))
|
||||
! call get_pseudo_inverse(A(1,1,1,1), ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv(1,1,1,1), ao_num*ao_num, cutoff_svd)
|
||||
! A = A_inv
|
||||
|
||||
allocate(D(ao_num*ao_num), U(ao_num*ao_num,ao_num*ao_num), Vt(ao_num*ao_num,ao_num*ao_num))
|
||||
|
||||
allocate(work(1))
|
||||
lwork = -1
|
||||
call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A(1,1,1,1), ao_num*ao_num &
|
||||
, D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
|
||||
if(info /= 0) then
|
||||
print *, info, ': SVD failed'
|
||||
stop
|
||||
endif
|
||||
|
||||
LWORK = max(5*ao_num*ao_num, int(WORK(1)))
|
||||
deallocate(work)
|
||||
allocate(work(lwork))
|
||||
call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A(1,1,1,1), ao_num*ao_num &
|
||||
, D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
|
||||
if(info /= 0) then
|
||||
print *, info, ':: SVD failed'
|
||||
stop 1
|
||||
endif
|
||||
|
||||
deallocate(work)
|
||||
|
||||
if(D(1) .lt. 1d-14) then
|
||||
print*, ' largest singular value is very small:', D(1)
|
||||
n_svd = 1
|
||||
else
|
||||
n_svd = 0
|
||||
D1_inv = 1.d0 / D(1)
|
||||
do ij = 1, ao_num*ao_num
|
||||
if(D(ij)*D1_inv > cutoff_svd) then
|
||||
D(ij) = 1.d0 / D(ij)
|
||||
n_svd = n_svd + 1
|
||||
else
|
||||
D(ij) = 0.d0
|
||||
endif
|
||||
enddo
|
||||
endif
|
||||
print*, ' n_svd = ', n_svd
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, ij, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, &
|
||||
!$OMP final_weight_at_r_vector, aos_in_r_array_transp, u1e_tmp, b)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
b(ij) = 0.d0
|
||||
do ipoint = 1, n_points_final_grid
|
||||
b(ij) = b(ij) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) * u1e_tmp(ipoint)
|
||||
enddo
|
||||
!$OMP PRIVATE (ij, kl) &
|
||||
!$OMP SHARED (ao_num, n_svd, D, Vt)
|
||||
!$OMP DO
|
||||
do kl = 1, ao_num*ao_num
|
||||
do ij = 1, n_svd
|
||||
Vt(ij,kl) = Vt(ij,kl) * D(ij)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(u1e_tmp)
|
||||
! A = A_inv
|
||||
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_svd, 1.d0 &
|
||||
, U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num &
|
||||
, 0.d0, A(1,1,1,1), ao_num*ao_num)
|
||||
|
||||
! --- --- ---
|
||||
! solve Ax = b
|
||||
deallocate(D, U, Vt)
|
||||
|
||||
allocate(A_inv(ao_num*ao_num,ao_num*ao_num))
|
||||
!call get_inverse(A, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num)
|
||||
call get_pseudo_inverse(A, ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num, 5d-8)
|
||||
|
||||
! ---
|
||||
|
||||
! coef_fit = A_inv x b
|
||||
call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A_inv, ao_num*ao_num, b, 1, 0.d0, coef_fit, 1)
|
||||
call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A(1,1,1,1), ao_num*ao_num, b(1), 1, 0.d0, coef_fit(1,1), 1)
|
||||
!call dgemm( "N", "N", ao_num*ao_num, 1, ao_num*ao_num, 1.d0 &
|
||||
! , A(1,1,1,1), ao_num*ao_num, b(1), ao_num*ao_num &
|
||||
! , 0.d0, coef_fit(1,1), ao_num*ao_num)
|
||||
|
||||
integer :: mn
|
||||
double precision :: tmp, acc, nrm
|
||||
|
||||
acc = 0.d0
|
||||
nrm = 0.d0
|
||||
do ij = 1, ao_num*ao_num
|
||||
tmp = 0.d0
|
||||
do kl = 1, ao_num*ao_num
|
||||
tmp += A(ij,kl) * coef_fit(kl)
|
||||
enddo
|
||||
tmp = tmp - b(ij)
|
||||
if(dabs(tmp) .gt. 1d-7) then
|
||||
print*, ' problem found in fitting 1e-Jastrow'
|
||||
print*, ij, tmp
|
||||
endif
|
||||
|
||||
acc += dabs(tmp)
|
||||
nrm += dabs(b(ij))
|
||||
enddo
|
||||
print *, ' Relative Error (%) =', 100.d0*acc/nrm
|
||||
|
||||
|
||||
deallocate(A, A_inv, b)
|
||||
deallocate(A, b)
|
||||
|
||||
call wall_time(t1)
|
||||
print*, ' END after (min) ', (t1-t0)/60.d0
|
||||
|
94
plugins/local/non_h_ints_mu/print_j1ecoef_info.irp.f
Normal file
94
plugins/local/non_h_ints_mu/print_j1ecoef_info.irp.f
Normal file
@ -0,0 +1,94 @@
|
||||
|
||||
! ---
|
||||
|
||||
program print_j1ecoef_info
|
||||
|
||||
implicit none
|
||||
|
||||
my_grid_becke = .True.
|
||||
PROVIDE tc_grid1_a tc_grid1_r
|
||||
my_n_pt_r_grid = tc_grid1_r
|
||||
my_n_pt_a_grid = tc_grid1_a
|
||||
touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
|
||||
|
||||
if(tc_integ_type .eq. "numeric") then
|
||||
my_extra_grid_becke = .True.
|
||||
PROVIDE tc_grid2_a tc_grid2_r
|
||||
my_n_pt_r_extra_grid = tc_grid2_r
|
||||
my_n_pt_a_extra_grid = tc_grid2_a
|
||||
touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
|
||||
endif
|
||||
|
||||
call print_j1ecoef()
|
||||
|
||||
end
|
||||
|
||||
! ---
|
||||
|
||||
subroutine print_j1ecoef()
|
||||
|
||||
implicit none
|
||||
integer :: i, j, ij
|
||||
integer :: ierr
|
||||
logical :: exists
|
||||
character(len=10) :: ni, nj
|
||||
double precision, allocatable :: coef_fit2(:)
|
||||
|
||||
PROVIDE ao_l_char_space
|
||||
|
||||
allocate(coef_fit2(ao_num*ao_num))
|
||||
|
||||
if(mpi_master) then
|
||||
call ezfio_has_jastrow_j1e_coef_ao2(exists)
|
||||
endif
|
||||
IRP_IF MPI_DEBUG
|
||||
print *, irp_here, mpi_rank
|
||||
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
||||
IRP_ENDIF
|
||||
IRP_IF MPI
|
||||
include 'mpif.h'
|
||||
call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||
if (ierr /= MPI_SUCCESS) then
|
||||
stop 'Unable to read j1e_coef_ao2 with MPI'
|
||||
endif
|
||||
IRP_ENDIF
|
||||
if(exists) then
|
||||
if(mpi_master) then
|
||||
write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao2 ] <<<<< ..'
|
||||
call ezfio_get_jastrow_j1e_coef_ao2(coef_fit2)
|
||||
IRP_IF MPI
|
||||
call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||
if (ierr /= MPI_SUCCESS) then
|
||||
stop 'Unable to read j1e_coef_ao2 with MPI'
|
||||
endif
|
||||
IRP_ENDIF
|
||||
endif
|
||||
else
|
||||
|
||||
call get_j1e_coef_fit_ao2(ao_num*ao_num, coef_fit2)
|
||||
call ezfio_set_jastrow_j1e_coef_ao2(coef_fit2)
|
||||
|
||||
endif
|
||||
|
||||
|
||||
do i = 1, ao_num
|
||||
write(ni, '(I0)') ao_l(i)+1
|
||||
do j = 1, ao_num
|
||||
write(nj, '(I0)') ao_l(j)+1
|
||||
ij = (i-1)*ao_num + j
|
||||
print *, trim(adjustl(ni)) // trim(adjustl(ao_l_char_space(i))), " " &
|
||||
, trim(adjustl(nj)) // trim(adjustl(ao_l_char_space(j))), " " &
|
||||
, dabs(coef_fit2(ij))
|
||||
enddo
|
||||
! print *, ' '
|
||||
enddo
|
||||
|
||||
|
||||
deallocate(coef_fit2)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
! ---
|
||||
|
||||
|
@ -39,8 +39,11 @@ program test_non_h
|
||||
|
||||
!call test_j1e_fit_ao()
|
||||
|
||||
call test_tc_grad_and_lapl_ao_new()
|
||||
call test_tc_grad_square_ao_new()
|
||||
!call test_tc_grad_and_lapl_ao_new()
|
||||
!call test_tc_grad_square_ao_new()
|
||||
|
||||
!call test_fit_coef_A1()
|
||||
call test_fit_coef_inv()
|
||||
end
|
||||
|
||||
! ---
|
||||
@ -1112,3 +1115,333 @@ END_PROVIDER
|
||||
|
||||
! ---
|
||||
|
||||
subroutine test_fit_coef_A1()
|
||||
|
||||
implicit none
|
||||
integer :: i, j, k, l, ij, kl, ipoint
|
||||
double precision :: t1, t2
|
||||
double precision :: accu, norm, diff
|
||||
double precision, allocatable :: A1(:,:)
|
||||
double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:)
|
||||
|
||||
! ---
|
||||
|
||||
allocate(A1(ao_num*ao_num,ao_num*ao_num))
|
||||
|
||||
call wall_time(t1)
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, &
|
||||
!$OMP final_weight_at_r_vector, aos_in_r_array_transp, A1)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do k = 1, ao_num
|
||||
do l = 1, ao_num
|
||||
kl = (k-1)*ao_num + l
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
A1(ij,kl) = 0.d0
|
||||
do ipoint = 1, n_points_final_grid
|
||||
A1(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) &
|
||||
* aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call wall_time(t2)
|
||||
print*, ' WALL TIME FOR A1 (min) =', (t2-t1)/60.d0
|
||||
|
||||
! ---
|
||||
|
||||
call wall_time(t1)
|
||||
|
||||
allocate(tmp(ao_num,ao_num,n_points_final_grid))
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do j = 1, ao_num
|
||||
do i = 1, ao_num
|
||||
do ipoint = 1, n_points_final_grid
|
||||
tmp(i,j,ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
allocate(A2(ao_num,ao_num,ao_num,ao_num))
|
||||
|
||||
call dgemm( "N", "T", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
|
||||
, tmp(1,1,1), ao_num*ao_num, tmp(1,1,1), ao_num*ao_num &
|
||||
, 0.d0, A2(1,1,1,1), ao_num*ao_num)
|
||||
deallocate(tmp)
|
||||
|
||||
call wall_time(t2)
|
||||
print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0
|
||||
|
||||
! ---
|
||||
|
||||
accu = 0.d0
|
||||
norm = 0.d0
|
||||
do k = 1, ao_num
|
||||
do l = 1, ao_num
|
||||
kl = (k-1)*ao_num + l
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
diff = dabs(A2(j,i,l,k) - A1(ij,kl))
|
||||
if(diff .gt. 1d-10) then
|
||||
print *, ' problem in A2 on:', i, i, l, k
|
||||
print *, ' A1 :', A1(ij,kl)
|
||||
print *, ' A2 :', A2(j,i,l,k)
|
||||
stop
|
||||
endif
|
||||
|
||||
accu += diff
|
||||
norm += dabs(A1(ij,kl))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(A1, A2)
|
||||
|
||||
print*, ' accuracy (%) = ', 100.d0 * accu / norm
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
! ---
|
||||
|
||||
subroutine test_fit_coef_inv()
|
||||
|
||||
implicit none
|
||||
integer :: i, j, k, l, ij, kl, ipoint
|
||||
integer :: n_svd, info, lwork, mn
|
||||
double precision :: t1, t2
|
||||
double precision :: accu, norm, diff
|
||||
double precision :: cutoff_svd, D1_inv
|
||||
double precision, allocatable :: A1(:,:), A1_inv(:,:), A1_tmp(:,:)
|
||||
double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:), A2_inv(:,:,:,:)
|
||||
double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A2_tmp(:,:,:,:)
|
||||
|
||||
|
||||
cutoff_svd = 5d-8
|
||||
|
||||
! ---
|
||||
|
||||
call wall_time(t1)
|
||||
|
||||
allocate(A1(ao_num*ao_num,ao_num*ao_num))
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, &
|
||||
!$OMP final_weight_at_r_vector, aos_in_r_array_transp, A1)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do k = 1, ao_num
|
||||
do l = 1, ao_num
|
||||
kl = (k-1)*ao_num + l
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
A1(ij,kl) = 0.d0
|
||||
do ipoint = 1, n_points_final_grid
|
||||
A1(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) &
|
||||
* aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call wall_time(t2)
|
||||
print*, ' WALL TIME FOR A1 (min) =', (t2-t1)/60.d0
|
||||
|
||||
allocate(A1_inv(ao_num*ao_num,ao_num*ao_num))
|
||||
call get_pseudo_inverse(A1, ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A1_inv, ao_num*ao_num, cutoff_svd)
|
||||
|
||||
call wall_time(t1)
|
||||
print*, ' WALL TIME FOR A1_inv (min) =', (t1-t2)/60.d0
|
||||
|
||||
! ---
|
||||
|
||||
call wall_time(t1)
|
||||
|
||||
allocate(tmp(n_points_final_grid,ao_num,ao_num))
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, ipoint) &
|
||||
!$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do j = 1, ao_num
|
||||
do i = 1, ao_num
|
||||
do ipoint = 1, n_points_final_grid
|
||||
tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
allocate(A2(ao_num,ao_num,ao_num,ao_num))
|
||||
|
||||
call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
|
||||
, tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid &
|
||||
, 0.d0, A2(1,1,1,1), ao_num*ao_num)
|
||||
|
||||
deallocate(tmp)
|
||||
|
||||
call wall_time(t2)
|
||||
print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0
|
||||
|
||||
allocate(A1_tmp(ao_num*ao_num,ao_num*ao_num))
|
||||
A1_tmp = A1
|
||||
allocate(A2_tmp(ao_num,ao_num,ao_num,ao_num))
|
||||
A2_tmp = A2
|
||||
|
||||
allocate(A2_inv(ao_num,ao_num,ao_num,ao_num))
|
||||
|
||||
allocate(D(ao_num*ao_num), U(ao_num*ao_num,ao_num*ao_num), Vt(ao_num*ao_num,ao_num*ao_num))
|
||||
|
||||
allocate(work(1))
|
||||
lwork = -1
|
||||
|
||||
call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A1_tmp(1,1), ao_num*ao_num &
|
||||
!call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A2_tmp(1,1,1,1), ao_num*ao_num &
|
||||
, D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
|
||||
if(info /= 0) then
|
||||
print *, info, ': SVD failed'
|
||||
stop
|
||||
endif
|
||||
|
||||
LWORK = max(5*ao_num*ao_num, int(WORK(1)))
|
||||
deallocate(work)
|
||||
allocate(work(lwork))
|
||||
|
||||
call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A1_tmp(1,1), ao_num*ao_num &
|
||||
!call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A2_tmp(1,1,1,1), ao_num*ao_num &
|
||||
, D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
|
||||
if(info /= 0) then
|
||||
print *, info, ':: SVD failed'
|
||||
stop 1
|
||||
endif
|
||||
|
||||
deallocate(A2_tmp)
|
||||
deallocate(work)
|
||||
|
||||
n_svd = 0
|
||||
D1_inv = 1.d0 / D(1)
|
||||
do ij = 1, ao_num*ao_num
|
||||
if(D(ij)*D1_inv > cutoff_svd) then
|
||||
D(ij) = 1.d0 / D(ij)
|
||||
n_svd = n_svd + 1
|
||||
else
|
||||
D(ij) = 0.d0
|
||||
endif
|
||||
enddo
|
||||
print*, ' n_svd = ', n_svd
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (ij, kl) &
|
||||
!$OMP SHARED (ao_num, n_svd, D, Vt)
|
||||
!$OMP DO
|
||||
do kl = 1, ao_num*ao_num
|
||||
do ij = 1, n_svd
|
||||
Vt(ij,kl) = Vt(ij,kl) * D(ij)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_svd, 1.d0 &
|
||||
, U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num &
|
||||
, 0.d0, A2_inv(1,1,1,1), ao_num*ao_num)
|
||||
|
||||
deallocate(D, U, Vt)
|
||||
|
||||
call wall_time(t1)
|
||||
print*, ' WALL TIME FOR A2_inv (min) =', (t1-t2)/60.d0
|
||||
|
||||
! ---
|
||||
|
||||
accu = 0.d0
|
||||
norm = 0.d0
|
||||
do k = 1, ao_num
|
||||
do l = 1, ao_num
|
||||
kl = (k-1)*ao_num + l
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
diff = dabs(A2(j,i,l,k) - A1(ij,kl))
|
||||
if(diff .gt. 1d-10) then
|
||||
print *, ' problem in A2 on:', i, i, l, k
|
||||
print *, ' A1 :', A1(ij,kl)
|
||||
print *, ' A2 :', A2(j,i,l,k)
|
||||
stop
|
||||
endif
|
||||
|
||||
accu += diff
|
||||
norm += dabs(A1(ij,kl))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
print*, ' accuracy on A (%) = ', 100.d0 * accu / norm
|
||||
|
||||
accu = 0.d0
|
||||
norm = 0.d0
|
||||
do k = 1, ao_num
|
||||
do l = 1, ao_num
|
||||
kl = (k-1)*ao_num + l
|
||||
|
||||
do i = 1, ao_num
|
||||
do j = 1, ao_num
|
||||
ij = (i-1)*ao_num + j
|
||||
|
||||
diff = dabs(A2_inv(j,i,l,k) - A1_inv(ij,kl))
|
||||
if(diff .gt. cutoff_svd) then
|
||||
print *, ' problem in A2_inv on:', i, i, l, k
|
||||
print *, ' A1_inv :', A1_inv(ij,kl)
|
||||
print *, ' A2_inv :', A2_inv(j,i,l,k)
|
||||
stop
|
||||
endif
|
||||
|
||||
accu += diff
|
||||
norm += dabs(A1_inv(ij,kl))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(A1_inv, A2_inv)
|
||||
deallocate(A1, A2)
|
||||
|
||||
print*, ' accuracy on A_inv (%) = ', 100.d0 * accu / norm
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
! ---
|
||||
|
||||
|
@ -67,6 +67,13 @@ END_PROVIDER
|
||||
index_final_points(2,i_count) = i
|
||||
index_final_points(3,i_count) = j
|
||||
index_final_points_reverse(k,i,j) = i_count
|
||||
|
||||
if(final_weight_at_r_vector(i_count) .lt. 0.d0) then
|
||||
print *, ' !!! WARNING !!!'
|
||||
print *, ' negative weight !!!!'
|
||||
print *, i_count, final_weight_at_r_vector(i_count)
|
||||
stop
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
@ -1323,18 +1323,22 @@ subroutine get_inverse(A,LDA,m,C,LDC)
|
||||
end
|
||||
|
||||
subroutine get_pseudo_inverse(A, LDA, m, n, C, LDC, cutoff)
|
||||
implicit none
|
||||
|
||||
BEGIN_DOC
|
||||
! Find C = A^-1
|
||||
END_DOC
|
||||
|
||||
implicit none
|
||||
integer, intent(in) :: m, n, LDA, LDC
|
||||
double precision, intent(in) :: A(LDA,n)
|
||||
double precision, intent(in) :: cutoff
|
||||
double precision, intent(out) :: C(LDC,m)
|
||||
|
||||
double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A_tmp(:,:)
|
||||
integer :: info, lwork
|
||||
integer :: i,j,k
|
||||
integer :: i, j, k, n_svd
|
||||
double precision :: D1_inv
|
||||
double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A_tmp(:,:)
|
||||
|
||||
allocate (D(n),U(m,n),Vt(n,n),work(1),A_tmp(m,n))
|
||||
do j=1,n
|
||||
do i=1,m
|
||||
@ -1356,22 +1360,47 @@ subroutine get_pseudo_inverse(A,LDA,m,n,C,LDC,cutoff)
|
||||
stop 1
|
||||
endif
|
||||
|
||||
if(D(1) .lt. 1d-14) then
|
||||
print*, ' largest singular value is very small:', D(1)
|
||||
n_svd = 1
|
||||
else
|
||||
n_svd = 0
|
||||
D1_inv = 1.d0 / D(1)
|
||||
do i = 1, n
|
||||
if (D(i)/D(1) > cutoff) then
|
||||
if(D(i)*D1_inv > cutoff) then
|
||||
D(i) = 1.d0 / D(i)
|
||||
n_svd = n_svd + 1
|
||||
else
|
||||
D(i) = 0.d0
|
||||
endif
|
||||
enddo
|
||||
endif
|
||||
|
||||
C = 0.d0
|
||||
do i=1,m
|
||||
print*, ' n_svd = ', n_svd
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j) &
|
||||
!$OMP SHARED (n, n_svd, D, Vt)
|
||||
!$OMP DO
|
||||
do j = 1, n
|
||||
do k=1,n
|
||||
C(j,i) = C(j,i) + U(i,k) * D(k) * Vt(k,j)
|
||||
enddo
|
||||
do i = 1, n_svd
|
||||
Vt(i,j) = D(i) * Vt(i,j)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call dgemm("N", "N", m, n, n_svd, 1.d0, U, m, Vt, n, 0.d0, C, LDC)
|
||||
|
||||
! C = 0.d0
|
||||
! do i=1,m
|
||||
! do j=1,n
|
||||
! do k=1,n
|
||||
! C(j,i) = C(j,i) + U(i,k) * D(k) * Vt(k,j)
|
||||
! enddo
|
||||
! enddo
|
||||
! enddo
|
||||
|
||||
deallocate(U,D,Vt,work,A_tmp)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user