diff --git a/config/ifort_2021_debug.cfg b/config/ifort_2021_debug.cfg new file mode 100644 index 00000000..d70b1465 --- /dev/null +++ b/config/ifort_2021_debug.cfg @@ -0,0 +1,66 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -msse4.2 -O2 -ip -ftz -g + + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -msse4.2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/external/ezfio b/external/ezfio index 0520b5e2..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6a78e9ff..f4746144 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,46 +1,3 @@ -BEGIN_PROVIDER [ integer, mini_basis_size, (128) ] - implicit none - BEGIN_DOC - ! Size of the minimal basis set per element - END_DOC - - mini_basis_size(1:2) = 1 - mini_basis_size(3:4) = 2 - mini_basis_size(5:10) = 5 - mini_basis_size(11:12) = 6 - mini_basis_size(13:18) = 9 - mini_basis_size(19:20) = 13 - mini_basis_size(21:36) = 18 - mini_basis_size(37:38) = 22 - mini_basis_size(39:54) = 27 - mini_basis_size(55:) = 36 -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] - implicit none - BEGIN_DOC - ! Number of Cholesky vectors in AO basis - END_DOC - - cholesky_ao_num_guess = ao_num*ao_num !sum(mini_basis_size(int(nucl_charge(:)))) -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num ] -&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] - use mmap_module - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - END_DOC - - cholesky_ao_num = cholesky_ao_num_guess - - call direct_cholesky(cholesky_ao, ao_num*ao_num, cholesky_ao_num, ao_cholesky_threshold) - print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - -END_PROVIDER - BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -57,41 +14,74 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -subroutine direct_cholesky(L, ndim, rank, tau) +BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] implicit none BEGIN_DOC -! Cholesky-decomposed AOs. -! -! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf : -! Page 32, section 13.5 + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num END_DOC - integer :: ndim - integer, intent(out) :: rank - double precision, intent(out) :: L(ndim, ndim) - double precision, intent(in) :: tau - double precision, parameter :: s = 1.d-2 + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + + + double precision, parameter :: s = 1.d-1 double precision, parameter :: dscale = 1.d0 - double precision, allocatable :: D(:), Delta(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer, allocatable :: Lset_rev(:), Dset_rev(:) - integer :: i,j,k,m,p,q, qj, dj + integer :: i,j,k,m,p,q, qj, dj, p2, q2 integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(2,ndim) ) + integer :: block_size, iblock, ierr + + integer(omp_lock_kind), allocatable :: lock(:) + + PROVIDE ao_two_e_integrals_in_map + deallocate(cholesky_ao) + + ndim = ao_num*ao_num + tau = ao_cholesky_threshold + + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + + + rank = 0 + + allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) + allocate( addr(3,ndim) ) + do k=1,ndim + call omp_init_lock(lock(k)) + enddo ! 1. k=0 - do i=1,ao_num - do j=1,ao_num + do j=1,ao_num + do i=1,ao_num k = k+1 addr(1,k) = i addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j enddo enddo @@ -107,10 +97,12 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! 2. np=0 + Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p + Lset_rev(p) = np endif enddo @@ -126,53 +118,148 @@ subroutine direct_cholesky(L, ndim, rank, tau) i = i+1 ! b. - Dmin = max(s*Dmax, tau) + Dmin = max(s*Dmax,tau) ! c. nq=0 - do q=1,np - if ( D(Lset(q)) > Dmin ) then + LDmap = 0 + DLmap = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then nq = nq+1 - Dset(nq) = Lset(q) + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p endif enddo ! d., e. - allocate(Delta(np,nq)) - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) - do m=1,nq - do k=1,np - Delta(k,m) = get_ao_two_e_integral( & - addr(1,Lset(k)), & - addr(1,Dset(m)), & - addr(2,Lset(k)), & - addr(2,Dset(m)), & - ao_integrals_map) - enddo + block_size = max(N,24) - do p=1,N - f = L(Dset(m),p) - do k=1,np - Delta(k,m) = Delta(k,m) - L(Lset(k),p) * f - enddo + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + L(:,k) = L_old(:,k) + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif + + Delta(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + + !$OMP DO + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO - ! f. + !$OMP DO SCHEDULE(dynamic,8) + do m=1,nq + + call omp_set_lock(lock(m)) + do k=1,np + ! Apply only to (k,m) pairs where k is not in Dset + if (LDmap(k) /= 0) cycle + q = Lset_rev(addr(3,Lset(k))) + if ((0 < q).and.(q < k)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + if (q /= 0) Delta(q,m) = Delta(k,m) + endif + enddo + + j = Dset_rev(addr(3,Dset(m))) + if ((0 < j).and.(j < m)) then + call omp_unset_lock(lock(m)) + cycle + endif + + if ((j /= m).and.(j /= 0)) then + call omp_set_lock(lock(j)) + endif + do k=1,nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + if (q /= 0) Delta(q,m) = Delta(p,m) + if (j /= 0) Delta(p,j) = Delta(p,m) + if (q*j /= 0) Delta(q,j) = Delta(p,m) + endif + enddo + call omp_unset_lock(lock(m)) + if ((j /= m).and.(j /= 0)) then + call omp_unset_lock(lock(j)) + endif + enddo + !$OMP END DO + + !$OMP END PARALLEL + + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif + + ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo ! g. - j = 0 - do while ( (j <= nq).and.(Qmax > Dmin) ) + iblock = 0 + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. - j = j+1 rank = N+j + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + ! ii. do dj=1,nq qj = Dset(dj) @@ -181,32 +268,51 @@ subroutine direct_cholesky(L, ndim, rank, tau) endif enddo - ! iii. - f = 1.d0/dsqrt(Qmax) + L(1:ndim, rank) = 0.d0 + + iblock = iblock+1 do p=1,np - L(Lset(p), rank) = Delta(p,dj) * f + Ltmp_p(p,iblock) = Delta(p,dj) enddo ! iv. - do m=1, nq - f = L(Dset(m),rank) - do k=1, np - Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * f - enddo - enddo + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & + Ltmp_p(1,iblock), 1) + endif - do k=1, np - D(Lset(k)) = D(Lset(k)) - L(Lset(k),rank) * L(Lset(k),rank) + ! iii. + f = 1.d0/dsqrt(Qmax) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP DO + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) enddo + !$OMP END DO + + !$OMP DO + do q=1,nq + Ltmp_q(q,iblock) = L(Dset(q), rank) + enddo + !$OMP END DO + + !$OMP END PARALLEL Qmax = D(Dset(1)) - do q=1,np - Qmax = max(Qmax, D(Lset(q))) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) enddo enddo - deallocate(Delta) + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(Delta, stat=ierr) + deallocate(Ltmp_p, stat=ierr) + deallocate(Ltmp_q, stat=ierr) ! i. N = N+j @@ -218,13 +324,30 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo np=0 + Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p + Lset_rev(p) = np endif enddo enddo -end + do k=1,ndim + call omp_destroy_lock(lock(k)) + enddo + + allocate(cholesky_ao(ao_num,ao_num,rank)) + call dcopy(ndim*rank, L, 1, cholesky_ao, 1) + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + +END_PROVIDER + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 40c57188..1d77180e 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -112,7 +112,7 @@ subroutine run_ccsd_space_orb ! Energy call ccsd_energy_space(nO,nV,tau,t1,energy) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then @@ -132,7 +132,7 @@ subroutine run_ccsd_space_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then @@ -1549,19 +1549,26 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & + cc_list_vir,cc_list_vir,cc_list_vir,(/ cc_list_vir(gam) /), B1) + + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) - !$omp do - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) - enddo - enddo - enddo - !$omp end do nowait + +! !$omp do +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! enddo +! enddo +! enddo +! !$omp end do nowait + do i = 1, nO !$omp do do b = 1, nV @@ -1569,7 +1576,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) enddo enddo - !$omp end do nowait + !$omp end do enddo !$omp end parallel diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index a267cc45..09d6a0fe 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -241,7 +241,7 @@ subroutine run_ccsd_spin_orb call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) call wall_time(tfi) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then print*,'Total:',tfi-tbi,'s' @@ -266,7 +266,7 @@ subroutine run_ccsd_spin_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 31fe67ce..13fa4f1a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -94,6 +94,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo !$OMP END DO nowait + !$OMP BARRIER !$OMP END PARALLEL double precision, external :: ccsd_t_task_aba @@ -209,9 +210,9 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) print '(A)', '' - print '(A)', ' +----------------------+--------------+----------+' - print '(A)', ' | E(CCSD(T)) | Error | % |' - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ==========' + print '(A)', ' E(CCSD(T)) Error % ' + print '(A)', ' ======================= ============== ==========' call wall_time(t00) @@ -256,7 +257,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (imin >= bounds(2,isample)) then cycle endif - ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1 if (sampled(ieta) == -1_8) then sampled(ieta) = 0_8 @@ -280,9 +281,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call wall_time(t01) if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then - t00 = t01 !$OMP TASKWAIT + call wall_time(t01) + t00 = t01 double precision :: ET, ET2 double precision :: energy_stoch, energy_det @@ -322,17 +324,20 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ========== ' print '(A)', '' - deallocate(X_vovv,X_ooov,T_voov,T_oovv) + deallocate(X_vovv) + deallocate(X_ooov) + deallocate(T_voov) + deallocate(T_oovv) end diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 7909007a..3b048c14 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -591,7 +591,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ time-time0 ! Old print - !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, & + !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,ES16.6,ES16.6)', c, & ! pt2_data % pt2(pt2_stoch_istate) +E, & ! pt2_data_err % pt2(pt2_stoch_istate), & ! pt2_data % variance(pt2_stoch_istate), & diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 73608720..0dc939cb 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -331,7 +331,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index e59d21d1..24f4fa10 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -405,7 +405,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c045aa1a..cedaaf0a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -398,7 +398,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index 2621e3a9..deb7e3a9 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -316,7 +316,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index cd9124e6..9940bf1e 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -327,7 +327,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index 26853df9..b7179c18 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -457,7 +457,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 0c3c6f92..fa8aff80 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -477,7 +477,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 45258c1c..7b559925 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -611,7 +611,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 3ff060a6..96ca84ab 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -436,7 +436,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, !don't print continue else - write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st) + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/determinants/dipole_moments.irp.f b/src/determinants/dipole_moments.irp.f index 06fca0cd..e445c56b 100644 --- a/src/determinants/dipole_moments.irp.f +++ b/src/determinants/dipole_moments.irp.f @@ -66,9 +66,9 @@ END_PROVIDER write(*,'(i16)',advance='no') i end do write(*,*) '' - write(*,'(A17,100(1pE16.8))') 'x_dipole_moment = ',x_dipole_moment - write(*,'(A17,100(1pE16.8))') 'y_dipole_moment = ',y_dipole_moment - write(*,'(A17,100(1pE16.8))') 'z_dipole_moment = ',z_dipole_moment + write(*,'(A17,100(ES16.8))') 'x_dipole_moment = ',x_dipole_moment + write(*,'(A17,100(ES16.8))') 'y_dipole_moment = ',y_dipole_moment + write(*,'(A17,100(ES16.8))') 'z_dipole_moment = ',z_dipole_moment !print*, 'x_dipole_moment = ',x_dipole_moment !print*, 'y_dipole_moment = ',y_dipole_moment !print*, 'z_dipole_moment = ',z_dipole_moment diff --git a/src/ezfio_files/NEED b/src/ezfio_files/NEED index d06d604c..1766924f 100644 --- a/src/ezfio_files/NEED +++ b/src/ezfio_files/NEED @@ -1,2 +1,3 @@ mpi zmq +utils diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index 4f53b173..7e414a04 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -5,7 +5,9 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] ! variable if it is set, or as the 1st argument of the command line. END_DOC - PROVIDE mpi_initialized + PROVIDE mpi_initialized output_wall_time_0 + + integer :: i ! Get the QPACKAGE_INPUT environment variable call getenv('QPACKAGE_INPUT',ezfio_filename) @@ -44,11 +46,14 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] END_PROVIDER BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ] + use c_functions implicit none BEGIN_DOC ! EZFIO/work/ END_DOC - call ezfio_set_work_empty(.False.) + logical :: b + b = mkl_serv_intel_cpu_true() /= 1 + call ezfio_set_work_empty(b) ezfio_work_dir = trim(ezfio_filename)//'/work/' END_PROVIDER diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization/first_gradient_opt.irp.f index d6918a00..f08b9d1f 100644 --- a/src/mo_optimization/first_gradient_opt.irp.f +++ b/src/mo_optimization/first_gradient_opt.irp.f @@ -111,7 +111,7 @@ subroutine first_gradient_opt(n,v_grad) if (debug) then print*,'Matrix containing the gradient :' do i = 1, mo_num - write(*,'(100(E12.5))') A(i,1:mo_num) + write(*,'(100(ES12.5))') A(i,1:mo_num) enddo endif diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 868de444..37dfe051 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -62,7 +62,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l enddo enddo enddo @@ -71,7 +71,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 enddo enddo @@ -128,7 +128,7 @@ subroutine ERI_dump() do k = 1, mo_num do j = 1, mo_num do i = 1, mo_num - write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l) + write(33, '(4(I4, 2X), 4X, ES15.7)') i, j, k, l, a1(i,j,k,l) enddo enddo enddo @@ -167,8 +167,8 @@ subroutine LMat_tilde_dump() !write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral ! TCHint convention if(dabs(integral).gt.1d-10) then - write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n - !write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k + write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n + !write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k endif enddo enddo diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index b86009ee..98c7b230 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -72,7 +72,7 @@ subroutine molden_lr write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -170,7 +170,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo write (i_unit_output,*) 'Sym= 1' @@ -178,7 +178,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -235,7 +235,7 @@ subroutine molden_l() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -333,7 +333,7 @@ subroutine molden_l() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -390,7 +390,7 @@ subroutine molden_r() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -488,7 +488,7 @@ subroutine molden_r() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index 830a141e..e5902a6f 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -44,7 +44,7 @@ program molden write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -142,7 +142,7 @@ program molden write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/print_ci_vectors.irp.f b/src/tools/print_ci_vectors.irp.f index 97dfdc0b..d5f86213 100644 --- a/src/tools/print_ci_vectors.irp.f +++ b/src/tools/print_ci_vectors.irp.f @@ -28,7 +28,7 @@ subroutine routine do i = 1, N_det print *, 'Determinant ', i call debug_det(psi_det(1,1,i),N_int) - print '(4E20.12,X)', (psi_coef(i,k), k=1,N_states) + print '(4ES20.12,X)', (psi_coef(i,k), k=1,N_states) print *, '' print *, '' enddo diff --git a/src/utils/c_functions.f90 b/src/utils/c_functions.f90 index 65d4ad62..a9c8900b 100644 --- a/src/utils/c_functions.f90 +++ b/src/utils/c_functions.f90 @@ -57,6 +57,12 @@ module c_functions end subroutine sscanf_sd_c end interface + interface + integer(kind=c_int) function mkl_serv_intel_cpu_true() bind(C) + use iso_c_binding + end function + end interface + contains integer function atoi(a) @@ -131,4 +137,3 @@ subroutine usleep(us) call usleep_c(u) end subroutine usleep - diff --git a/src/utils/fast_mkl.c b/src/utils/fast_mkl.c new file mode 100644 index 00000000..aa1f82f1 --- /dev/null +++ b/src/utils/fast_mkl.c @@ -0,0 +1,5 @@ +int mkl_serv_intel_cpu_true() { + return 1; +} + + diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 7f7458b6..c253456e 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -39,7 +39,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err write(str_size,'(I3)') size_nb ! Error - write(str_exp,'(1pE20.0)') error + write(str_exp,'(ES20.0)') error str_error = trim(adjustl(str_exp)) ! Number of digit: Y (FX.Y) from the exponent diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index dafcf7af..2db614b4 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -48,32 +48,56 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k if (do_ao_cholesky) then - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) + allocate(buffer(n1,n3,n2,n4)) + + !$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k) !$OMP DO - do i4 = 1, n4 + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_ao_num + v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i4=1,n4 idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 + do i2=1,n2 idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) + do k=1,cholesky_ao_num + v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + !$OMP END PARALLEL + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + v1, cholesky_ao_num, & + v2, cholesky_ao_num, 0.d0, buffer, n1*n3) + + deallocate(v1,v2) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) enddo enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END PARALLEL DO else double precision :: get_two_e_integral @@ -112,6 +136,7 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] if (do_ao_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) + call set_multiple_levels_omp(.False.) !$OMP PARALLEL & !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f index f268df04..db3d5c99 100644 --- a/src/utils_trust_region/rotation_matrix_iterative.irp.f +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -73,7 +73,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'R' !do i = 1, m - ! write(*,'(10(E12.5))') R(i,:) + ! write(*,'(10(ES12.5))') R(i,:) !enddo do i = 1, m @@ -82,7 +82,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'RRT' !do i = 1, m - ! write(*,'(10(E12.5))') RRT(i,:) + ! write(*,'(10(ES12.5))') RRT(i,:) !enddo max_elem = 0d0 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index b7dcf875..e98bbfb7 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -336,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,ES12.5,a,ES12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -345,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - !write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,ES12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -414,7 +414,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,ES12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then