From cc334b34b736af8a9ec2aa31a714f8a5d201956f Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 26 Jan 2024 19:50:18 +0100 Subject: [PATCH] opt in 1e-Jast & fixed bug in pseudo_inv --- .../local/non_h_ints_mu/jast_1e_utils.irp.f | 99 ++++++++----------- .../local/non_h_ints_mu/test_non_h_ints.irp.f | 37 ++++--- src/utils/linear_algebra.irp.f | 42 ++++---- 3 files changed, 85 insertions(+), 93 deletions(-) diff --git a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f index 90fcb5bb..79f780b1 100644 --- a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f +++ b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f @@ -127,8 +127,8 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) integer :: info, n_svd, LWORK double precision :: g double precision :: t0, t1 - double precision :: cutoff_svd - double precision, allocatable :: A(:,:,:,:), b(:,:) + double precision :: cutoff_svd, D1_inv + double precision, allocatable :: A(:,:,:,:), b(:) double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:) double precision, allocatable :: u1e_tmp(:), tmp(:,:,:) double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:) @@ -140,7 +140,7 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) PROVIDE mo_coef - cutoff_svd = 5d-8 + cutoff_svd = 1d-10 call wall_time(t0) print*, ' PROVIDING the representation of 1e-Jastrow in AOs x AOs ... ' @@ -175,31 +175,7 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) ! --- --- --- ! get A - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) & - !!$OMP SHARED (n_points_final_grid, ao_num, & - !!$OMP final_weight_at_r_vector, aos_in_r_array_transp, A) - !!$OMP DO COLLAPSE(2) - !do k = 1, ao_num - ! do l = 1, ao_num - ! kl = (k-1)*ao_num + l - ! do i = 1, ao_num - ! do j = 1, ao_num - ! ij = (i-1)*ao_num + j - ! A(ij,kl) = 0.d0 - ! do ipoint = 1, n_points_final_grid - ! A(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) & - ! * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l) - ! enddo - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL - - allocate(tmp(ao_num,ao_num,n_points_final_grid)) + allocate(tmp(n_points_final_grid,ao_num,ao_num)) allocate(A(ao_num,ao_num,ao_num,ao_num)) !$OMP PARALLEL & @@ -210,47 +186,41 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(i,j,ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm( "N", "T", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), ao_num*ao_num, tmp(1,1,1), ao_num*ao_num & + call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid & , 0.d0, A(1,1,1,1), ao_num*ao_num) - deallocate(tmp) - - ! --- --- --- ! get b - allocate(b(ao_num,ao_num)) + allocate(b(ao_num*ao_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, u1e_tmp, b) - !$OMP DO COLLAPSE(2) - do i = 1, ao_num - do j = 1, ao_num - b(j,i) = 0.d0 - do ipoint = 1, n_points_final_grid - b(j,i) = b(j,i) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) * u1e_tmp(ipoint) - enddo - enddo + do ipoint = 1, n_points_final_grid + u1e_tmp(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * u1e_tmp(ipoint) enddo - !$OMP END DO - !$OMP END PARALLEL + + call dgemv("T", n_points_final_grid, ao_num*ao_num, 1.d0, tmp(1,1,1), n_points_final_grid, u1e_tmp(1), 1, 0.d0, b(1), 1) + !call dgemm( "T", "N", ao_num*ao_num, 1, n_points_final_grid, 1.d0 & + ! , tmp(1,1,1), n_points_final_grid, u1e_tmp(1), n_points_final_grid & + ! , 0.d0, b(1), ao_num*ao_num) deallocate(u1e_tmp) + deallocate(tmp) ! --- --- --- ! solve Ax = b - !call get_pseudo_inverse(A, ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num, cutoff_svd) +! double precision, allocatable :: A_inv(:,:,:,:) +! allocate(A_inv(ao_num,ao_num,ao_num,ao_num)) +! call get_pseudo_inverse(A(1,1,1,1), ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv(1,1,1,1), ao_num*ao_num, cutoff_svd) +! A = A_inv allocate(D(ao_num*ao_num), U(ao_num*ao_num,ao_num*ao_num), Vt(ao_num*ao_num,ao_num*ao_num)) @@ -275,15 +245,21 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) deallocate(work) - n_svd = 0 - do ij = 1, ao_num*ao_num - if(D(ij)/D(1) > cutoff_svd) then - D(ij) = 1.d0 / D(ij) - n_svd = n_svd + 1 - else - D(ij) = 0.d0 - endif - enddo + if(D(1) .lt. 1d-14) then + print*, ' largest singular value is very small:', D(1) + n_svd = 1 + else + n_svd = 0 + D1_inv = 1.d0 / D(1) + do ij = 1, ao_num*ao_num + if(D(ij)*D1_inv > cutoff_svd) then + D(ij) = 1.d0 / D(ij) + n_svd = n_svd + 1 + else + D(ij) = 0.d0 + endif + enddo + endif print*, ' n_svd = ', n_svd !$OMP PARALLEL & @@ -310,7 +286,10 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit) ! --- ! coef_fit = A_inv x b - call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A(1,1,1,1), ao_num*ao_num, b(1,1), 1, 0.d0, coef_fit(1,1), 1) + call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A(1,1,1,1), ao_num*ao_num, b(1), 1, 0.d0, coef_fit(1,1), 1) + !call dgemm( "N", "N", ao_num*ao_num, 1, ao_num*ao_num, 1.d0 & + ! , A(1,1,1,1), ao_num*ao_num, b(1), ao_num*ao_num & + ! , 0.d0, coef_fit(1,1), ao_num*ao_num) deallocate(A, b) diff --git a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f index 2b96591b..c3fde334 100644 --- a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f +++ b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f @@ -1232,8 +1232,8 @@ subroutine test_fit_coef_inv() integer :: n_svd, info, lwork, mn double precision :: t1, t2 double precision :: accu, norm, diff - double precision :: cutoff_svd - double precision, allocatable :: A1(:,:), A1_inv(:,:) + double precision :: cutoff_svd, D1_inv + double precision, allocatable :: A1(:,:), A1_inv(:,:), A1_tmp(:,:) double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:), A2_inv(:,:,:,:) double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A2_tmp(:,:,:,:) @@ -1285,7 +1285,7 @@ subroutine test_fit_coef_inv() call wall_time(t1) - allocate(tmp(ao_num,ao_num,n_points_final_grid)) + allocate(tmp(n_points_final_grid,ao_num,ao_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & @@ -1294,7 +1294,7 @@ subroutine test_fit_coef_inv() do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(i,j,ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo @@ -1303,8 +1303,8 @@ subroutine test_fit_coef_inv() allocate(A2(ao_num,ao_num,ao_num,ao_num)) - call dgemm( "N", "T", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), ao_num*ao_num, tmp(1,1,1), ao_num*ao_num & + call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid & , 0.d0, A2(1,1,1,1), ao_num*ao_num) deallocate(tmp) @@ -1312,6 +1312,8 @@ subroutine test_fit_coef_inv() call wall_time(t2) print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0 + allocate(A1_tmp(ao_num*ao_num,ao_num*ao_num)) + A1_tmp = A1 allocate(A2_tmp(ao_num,ao_num,ao_num,ao_num)) A2_tmp = A2 @@ -1322,7 +1324,8 @@ subroutine test_fit_coef_inv() allocate(work(1)) lwork = -1 - call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A2_tmp(1,1,1,1), ao_num*ao_num & + call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A1_tmp(1,1), ao_num*ao_num & + !call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A2_tmp(1,1,1,1), ao_num*ao_num & , D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info) if(info /= 0) then print *, info, ': SVD failed' @@ -1333,7 +1336,8 @@ subroutine test_fit_coef_inv() deallocate(work) allocate(work(lwork)) - call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A2_tmp(1,1,1,1), ao_num*ao_num & + call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A1_tmp(1,1), ao_num*ao_num & + !call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A2_tmp(1,1,1,1), ao_num*ao_num & , D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info) if(info /= 0) then print *, info, ':: SVD failed' @@ -1343,9 +1347,10 @@ subroutine test_fit_coef_inv() deallocate(A2_tmp) deallocate(work) - n_svd = 0 + n_svd = 0 + D1_inv = 1.d0 / D(1) do ij = 1, ao_num*ao_num - if(D(ij)/D(1) > cutoff_svd) then + if(D(ij)*D1_inv > cutoff_svd) then D(ij) = 1.d0 / D(ij) n_svd = n_svd + 1 else @@ -1416,12 +1421,12 @@ subroutine test_fit_coef_inv() ij = (i-1)*ao_num + j diff = dabs(A2_inv(j,i,l,k) - A1_inv(ij,kl)) - !if(diff .gt. cutoff_svd) then - ! print *, ' problem in A2_inv on:', i, i, l, k - ! print *, ' A1_inv :', A1_inv(ij,kl) - ! print *, ' A2_inv :', A2_inv(j,i,l,k) - ! stop - !endif + if(diff .gt. cutoff_svd) then + print *, ' problem in A2_inv on:', i, i, l, k + print *, ' A1_inv :', A1_inv(ij,kl) + print *, ' A2_inv :', A2_inv(j,i,l,k) + stop + endif accu += diff norm += dabs(A1_inv(ij,kl)) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index a67a219c..c897140e 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1335,6 +1335,7 @@ subroutine get_pseudo_inverse(A, LDA, m, n, C, LDC, cutoff) integer :: info, lwork integer :: i, j, k, n_svd + double precision :: D1_inv double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A_tmp(:,:) allocate (D(n),U(m,n),Vt(n,n),work(1),A_tmp(m,n)) @@ -1358,15 +1359,22 @@ subroutine get_pseudo_inverse(A, LDA, m, n, C, LDC, cutoff) stop 1 endif - n_svd = 0 - do i = 1, n - if(D(i)/D(1) > cutoff) then - D(i) = 1.d0 / D(i) - n_svd = n_svd + 1 - else - D(i) = 0.d0 - endif - enddo + if(D(1) .lt. 1d-14) then + print*, ' largest singular value is very small:', D(1) + n_svd = 1 + else + n_svd = 0 + D1_inv = 1.d0 / D(1) + do i = 1, n + if(D(i)*D1_inv > cutoff) then + D(i) = 1.d0 / D(i) + n_svd = n_svd + 1 + else + D(i) = 0.d0 + endif + enddo + endif + print*, ' n_svd = ', n_svd !$OMP PARALLEL & @@ -1384,14 +1392,14 @@ subroutine get_pseudo_inverse(A, LDA, m, n, C, LDC, cutoff) call dgemm("N", "N", m, n, n_svd, 1.d0, U, m, Vt, n, 0.d0, C, LDC) - !C = 0.d0 - !do i=1,m - ! do j=1,n - ! do k=1,n - ! C(j,i) = C(j,i) + U(i,k) * D(k) * Vt(k,j) - ! enddo - ! enddo - !enddo +! C = 0.d0 +! do i=1,m +! do j=1,n +! do k=1,n +! C(j,i) = C(j,i) + U(i,k) * D(k) * Vt(k,j) +! enddo +! enddo +! enddo deallocate(U,D,Vt,work,A_tmp)