mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-15 18:43:51 +01:00
Merge branch 'dev-stable' of https://github.com/QuantumPackage/qp2 into dev-stable
This commit is contained in:
commit
9eba86fea0
@ -10,7 +10,8 @@
|
||||
- Added many types of integrals
|
||||
- Accelerated four-index transformation
|
||||
- Added transcorrelated SCF
|
||||
- Added transcorrelated CIPSI
|
||||
- Added bi-orthonormal transcorrelated CIPSI
|
||||
- Added Cholesky decomposition of AO integrals
|
||||
- Added CCSD and CCSD(T)
|
||||
- Added MO localization
|
||||
- Changed coupling parameters for ROHF
|
||||
@ -20,7 +21,7 @@
|
||||
- Removed cryptokit dependency in OCaml
|
||||
- Using now standard convention in RDM
|
||||
- Added molecular properties
|
||||
- [ ] Added GTOs with complex exponent
|
||||
- Added GTOs with complex exponent
|
||||
|
||||
*** TODO: take from dev
|
||||
- Updated version of f77-zmq
|
||||
|
66
config/ifort_2021_debug.cfg
Normal file
66
config/ifort_2021_debug.cfg
Normal file
@ -0,0 +1,66 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FC : -traceback
|
||||
FCFLAGS : -msse4.2 -O2 -ip -ftz -g
|
||||
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -msse4.2 -O2 -ip -ftz
|
||||
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -msse4.2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone
|
||||
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
13
etc/qp.rc
13
etc/qp.rc
@ -188,7 +188,18 @@ _qp_Complete()
|
||||
;;
|
||||
esac;;
|
||||
set_file)
|
||||
COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) )
|
||||
# Array to store directory names
|
||||
dirs=""
|
||||
|
||||
# Find directories containing "ezfio/.version" file recursively
|
||||
for i in $(find . -name ezfio | sed 's/ezfio$/.version/')
|
||||
do
|
||||
dir_name=${i%/.version} # Remove the ".version" suffix
|
||||
dir_name=${dir_name#./} # Remove the leading "./"
|
||||
dirs+="./$dir_name "
|
||||
done
|
||||
|
||||
COMPREPLY=( $(compgen -W "$dirs" -- ${cur} ) )
|
||||
return 0
|
||||
;;
|
||||
plugins)
|
||||
|
@ -4,6 +4,12 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ]
|
||||
interface: ezfio,provider,ocaml
|
||||
default: None
|
||||
|
||||
[io_ao_cholesky]
|
||||
type: Disk_access
|
||||
doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ]
|
||||
interface: ezfio,provider,ocaml
|
||||
default: None
|
||||
|
||||
[ao_integrals_threshold]
|
||||
type: Threshold
|
||||
doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero
|
||||
|
@ -1,121 +1,3 @@
|
||||
BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Number of Cholesky vectors in AO basis
|
||||
END_DOC
|
||||
|
||||
cholesky_ao_num_guess = ao_num*ao_num
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ]
|
||||
use mmap_module
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Cholesky vectors in AO basis: (ik|a):
|
||||
! <ij|kl> = (ik|jl) = sum_a (ik|a).(a|jl)
|
||||
END_DOC
|
||||
|
||||
type(c_ptr) :: ptr
|
||||
integer :: fd, i,j,k,l,m,rank
|
||||
double precision, pointer :: ao_integrals(:,:,:,:)
|
||||
double precision, external :: ao_two_e_integral
|
||||
|
||||
! Store AO integrals in a memory mapped file
|
||||
call mmap(trim(ezfio_work_dir)//'ao_integrals', &
|
||||
(/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), &
|
||||
8, fd, .False., ptr)
|
||||
call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/))
|
||||
|
||||
print*, 'Providing the AO integrals (Cholesky)'
|
||||
call wall_time(wall_1)
|
||||
call cpu_time(cpu_1)
|
||||
|
||||
ao_integrals = 0.d0
|
||||
|
||||
double precision :: integral, cpu_1, cpu_2, wall_1, wall_2
|
||||
logical, external :: ao_two_e_integral_zero
|
||||
double precision, external :: get_ao_two_e_integral
|
||||
|
||||
if (read_ao_two_e_integrals) then
|
||||
PROVIDE ao_two_e_integrals_in_map
|
||||
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2)
|
||||
do m=0,9
|
||||
do l=1+m,ao_num,10
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do j=1,ao_num
|
||||
do k=1,ao_num
|
||||
do i=1,ao_num
|
||||
if (ao_two_e_integral_zero(i,j,k,l)) cycle
|
||||
integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map)
|
||||
ao_integrals(i,k,j,l) = integral
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
enddo
|
||||
!$OMP MASTER
|
||||
call wall_time(wall_2)
|
||||
print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1
|
||||
!$OMP END MASTER
|
||||
enddo
|
||||
!$OMP END PARALLEL
|
||||
|
||||
else
|
||||
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2)
|
||||
do m=0,9
|
||||
do l=1+m,ao_num,10
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do j=1,l
|
||||
do k=1,ao_num
|
||||
do i=1,min(k,j)
|
||||
if (ao_two_e_integral_zero(i,j,k,l)) cycle
|
||||
integral = ao_two_e_integral(i,k,j,l)
|
||||
ao_integrals(i,k,j,l) = integral
|
||||
ao_integrals(k,i,j,l) = integral
|
||||
ao_integrals(i,k,l,j) = integral
|
||||
ao_integrals(k,i,l,j) = integral
|
||||
ao_integrals(j,l,i,k) = integral
|
||||
ao_integrals(j,l,k,i) = integral
|
||||
ao_integrals(l,j,i,k) = integral
|
||||
ao_integrals(l,j,k,i) = integral
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
enddo
|
||||
!$OMP MASTER
|
||||
call wall_time(wall_2)
|
||||
print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1
|
||||
!$OMP END MASTER
|
||||
enddo
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call wall_time(wall_2)
|
||||
call cpu_time(cpu_2)
|
||||
print*, 'AO integrals provided:'
|
||||
print*, ' cpu time :',cpu_2 - cpu_1, 's'
|
||||
print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
|
||||
|
||||
endif
|
||||
|
||||
! Call Lapack
|
||||
cholesky_ao_num = cholesky_ao_num_guess
|
||||
call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao)
|
||||
print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)'
|
||||
|
||||
! Remove mmap
|
||||
double precision, external :: getUnitAndOpen
|
||||
call munmap( &
|
||||
(/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), &
|
||||
8, fd, ptr)
|
||||
open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals')
|
||||
close(99, status='delete')
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
@ -131,3 +13,397 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num,
|
||||
enddo
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, cholesky_ao_num ]
|
||||
&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Cholesky vectors in AO basis: (ik|a):
|
||||
! <ij|kl> = (ik|jl) = sum_a (ik|a).(a|jl)
|
||||
!
|
||||
! Last dimension of cholesky_ao is cholesky_ao_num
|
||||
END_DOC
|
||||
|
||||
integer :: rank, ndim
|
||||
double precision :: tau
|
||||
double precision, pointer :: L(:,:), L_old(:,:)
|
||||
|
||||
|
||||
double precision :: s
|
||||
double precision, parameter :: dscale = 1.d0
|
||||
|
||||
double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:)
|
||||
integer, allocatable :: Lset(:), Dset(:), addr(:,:)
|
||||
logical, allocatable :: computed(:)
|
||||
|
||||
integer :: i,j,k,m,p,q, qj, dj, p2, q2
|
||||
integer :: N, np, nq
|
||||
|
||||
double precision :: Dmax, Dmin, Qmax, f
|
||||
double precision, external :: get_ao_two_e_integral
|
||||
logical, external :: ao_two_e_integral_zero
|
||||
|
||||
double precision, external :: ao_two_e_integral
|
||||
integer :: block_size, iblock, ierr
|
||||
|
||||
double precision :: mem
|
||||
double precision, external :: memory_of_double, memory_of_int
|
||||
|
||||
integer, external :: getUnitAndOpen
|
||||
integer :: iunit
|
||||
|
||||
ndim = ao_num*ao_num
|
||||
deallocate(cholesky_ao)
|
||||
|
||||
if (read_ao_cholesky) then
|
||||
print *, 'Reading Cholesky vectors from disk...'
|
||||
iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R')
|
||||
read(iunit) rank
|
||||
allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr)
|
||||
read(iunit) cholesky_ao
|
||||
close(iunit)
|
||||
cholesky_ao_num = rank
|
||||
|
||||
else
|
||||
|
||||
PROVIDE nucl_coord
|
||||
|
||||
if (do_direct_integrals) then
|
||||
if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then
|
||||
! Trigger providers inside ao_two_e_integral
|
||||
continue
|
||||
endif
|
||||
else
|
||||
PROVIDE ao_two_e_integrals_in_map
|
||||
endif
|
||||
|
||||
tau = ao_cholesky_threshold
|
||||
|
||||
mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim)
|
||||
call check_mem(mem, irp_here)
|
||||
|
||||
call print_memory_usage()
|
||||
|
||||
allocate(L(ndim,1))
|
||||
|
||||
print *, ''
|
||||
print *, 'Cholesky decomposition of AO integrals'
|
||||
print *, '======================================'
|
||||
print *, ''
|
||||
print *, '============ ============='
|
||||
print *, ' Rank Threshold'
|
||||
print *, '============ ============='
|
||||
|
||||
|
||||
rank = 0
|
||||
|
||||
allocate( D(ndim), Lset(ndim), Dset(ndim) )
|
||||
allocate( addr(3,ndim) )
|
||||
|
||||
! 1.
|
||||
k=0
|
||||
do j=1,ao_num
|
||||
do i=1,ao_num
|
||||
k = k+1
|
||||
addr(1,k) = i
|
||||
addr(2,k) = j
|
||||
addr(3,k) = (i-1)*ao_num + j
|
||||
enddo
|
||||
enddo
|
||||
|
||||
if (do_direct_integrals) then
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided)
|
||||
do i=1,ndim
|
||||
D(i) = ao_two_e_integral(addr(1,i), addr(2,i), &
|
||||
addr(1,i), addr(2,i))
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
else
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided)
|
||||
do i=1,ndim
|
||||
D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), &
|
||||
addr(2,i), addr(2,i), &
|
||||
ao_integrals_map)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
endif
|
||||
|
||||
Dmax = maxval(D)
|
||||
|
||||
! 2.
|
||||
np=0
|
||||
do p=1,ndim
|
||||
if ( dscale*dscale*Dmax*D(p) > tau*tau ) then
|
||||
np = np+1
|
||||
Lset(np) = p
|
||||
endif
|
||||
enddo
|
||||
|
||||
! 3.
|
||||
N = 0
|
||||
|
||||
! 4.
|
||||
i = 0
|
||||
|
||||
! 5.
|
||||
do while ( (Dmax > tau).and.(rank < ndim) )
|
||||
! a.
|
||||
i = i+1
|
||||
|
||||
s = 0.01d0
|
||||
|
||||
! Inrease s until the arrays fit in memory
|
||||
do while (.True.)
|
||||
|
||||
! b.
|
||||
Dmin = max(s*Dmax,tau)
|
||||
|
||||
! c.
|
||||
nq=0
|
||||
do p=1,np
|
||||
if ( D(Lset(p)) > Dmin ) then
|
||||
nq = nq+1
|
||||
Dset(nq) = Lset(p)
|
||||
endif
|
||||
enddo
|
||||
|
||||
call total_memory(mem)
|
||||
mem = mem &
|
||||
+ np*memory_of_double(nq) &! Delta(np,nq)
|
||||
+ (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq)
|
||||
+ (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size)
|
||||
|
||||
if (mem > qp_max_mem) then
|
||||
s = s*2.d0
|
||||
else
|
||||
exit
|
||||
endif
|
||||
|
||||
if ((s > 1.d0).or.(nq == 0)) then
|
||||
call print_memory_usage()
|
||||
print *, 'Not enough memory. Reduce cholesky threshold'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
! d., e.
|
||||
block_size = max(N,24)
|
||||
|
||||
L_old => L
|
||||
allocate(L(ndim,rank+nq), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
call print_memory_usage()
|
||||
print *, irp_here, ': allocation failed : (L(ndim,rank+nq))'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(k)
|
||||
do k=1,rank
|
||||
L(:,k) = L_old(:,k)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
deallocate(L_old)
|
||||
|
||||
allocate(Delta(np,nq), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
call print_memory_usage()
|
||||
print *, irp_here, ': allocation failed : (Delta(np,nq))'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
allocate(Ltmp_p(np,block_size), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
call print_memory_usage()
|
||||
print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
allocate(Ltmp_q(nq,block_size), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
call print_memory_usage()
|
||||
print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
|
||||
allocate(computed(nq))
|
||||
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j)
|
||||
|
||||
!$OMP DO
|
||||
do q=1,nq
|
||||
Delta(:,q) = 0.d0
|
||||
computed(q) = .False.
|
||||
enddo
|
||||
!$OMP ENDDO NOWAIT
|
||||
|
||||
!$OMP DO
|
||||
do k=1,N
|
||||
do p=1,np
|
||||
Ltmp_p(p,k) = L(Lset(p),k)
|
||||
enddo
|
||||
do q=1,nq
|
||||
Ltmp_q(q,k) = L(Dset(q),k)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
|
||||
if (N>0) then
|
||||
call dgemm('N','T', np, nq, N, -1.d0, &
|
||||
Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np)
|
||||
endif
|
||||
|
||||
! f.
|
||||
Qmax = D(Dset(1))
|
||||
do q=1,nq
|
||||
Qmax = max(Qmax, D(Dset(q)))
|
||||
enddo
|
||||
|
||||
! g.
|
||||
|
||||
iblock = 0
|
||||
do j=1,nq
|
||||
|
||||
if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit
|
||||
! i.
|
||||
rank = N+j
|
||||
|
||||
if (iblock == block_size) then
|
||||
call dgemm('N','T',np,nq,block_size,-1.d0, &
|
||||
Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np)
|
||||
iblock = 0
|
||||
endif
|
||||
|
||||
! ii.
|
||||
do dj=1,nq
|
||||
qj = Dset(dj)
|
||||
if (D(qj) == Qmax) then
|
||||
exit
|
||||
endif
|
||||
enddo
|
||||
|
||||
L(1:ndim, rank) = 0.d0
|
||||
|
||||
if (.not.computed(dj)) then
|
||||
m = dj
|
||||
!$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided)
|
||||
do k=np,1,-1
|
||||
if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),&
|
||||
addr(2,Lset(k)), addr(2,Dset(m)) ) ) then
|
||||
if (do_direct_integrals) then
|
||||
Delta(k,m) = Delta(k,m) + &
|
||||
ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),&
|
||||
addr(1,Dset(m)), addr(2,Dset(m)))
|
||||
else
|
||||
Delta(k,m) = Delta(k,m) + &
|
||||
get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),&
|
||||
addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map)
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
computed(dj) = .True.
|
||||
endif
|
||||
|
||||
iblock = iblock+1
|
||||
do p=1,np
|
||||
Ltmp_p(p,iblock) = Delta(p,dj)
|
||||
enddo
|
||||
|
||||
! iv.
|
||||
if (iblock > 1) then
|
||||
call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,&
|
||||
Ltmp_p(1,iblock), 1)
|
||||
endif
|
||||
|
||||
! iii.
|
||||
f = 1.d0/dsqrt(Qmax)
|
||||
|
||||
!$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared)
|
||||
!$OMP DO
|
||||
do p=1,np
|
||||
Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f
|
||||
L(Lset(p), rank) = Ltmp_p(p,iblock)
|
||||
D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO
|
||||
do q=1,nq
|
||||
Ltmp_q(q,iblock) = L(Dset(q), rank)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
Qmax = D(Dset(1))
|
||||
do q=1,nq
|
||||
Qmax = max(Qmax, D(Dset(q)))
|
||||
enddo
|
||||
|
||||
enddo
|
||||
|
||||
print '(I10, 4X, ES12.3)', rank, Qmax
|
||||
|
||||
deallocate(computed)
|
||||
deallocate(Delta)
|
||||
deallocate(Ltmp_p)
|
||||
deallocate(Ltmp_q)
|
||||
|
||||
! i.
|
||||
N = rank
|
||||
|
||||
! j.
|
||||
Dmax = D(Lset(1))
|
||||
do p=1,np
|
||||
Dmax = max(Dmax, D(Lset(p)))
|
||||
enddo
|
||||
|
||||
np=0
|
||||
do p=1,ndim
|
||||
if ( dscale*dscale*Dmax*D(p) > tau*tau ) then
|
||||
np = np+1
|
||||
Lset(np) = p
|
||||
endif
|
||||
enddo
|
||||
|
||||
enddo
|
||||
|
||||
allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
call print_memory_usage()
|
||||
print *, irp_here, ': Allocation failed'
|
||||
stop -1
|
||||
endif
|
||||
!$OMP PARALLEL DO PRIVATE(k)
|
||||
do k=1,rank
|
||||
call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
deallocate(L)
|
||||
cholesky_ao_num = rank
|
||||
|
||||
print *, '============ ============='
|
||||
print *, ''
|
||||
|
||||
if (write_ao_cholesky) then
|
||||
print *, 'Writing Cholesky vectors to disk...'
|
||||
iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W')
|
||||
write(iunit) rank
|
||||
write(iunit) cholesky_ao
|
||||
close(iunit)
|
||||
call ezfio_set_ao_two_e_ints_io_ao_cholesky('Read')
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)'
|
||||
print *, ''
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num)
|
||||
!$OMP PARALLEL DO PRIVATE(i,k) &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP SHARED (ao_num,ao_two_e_integral_schwartz) &
|
||||
!$OMP SCHEDULE(dynamic)
|
||||
!$OMP SCHEDULE(guided)
|
||||
do i=1,ao_num
|
||||
do k=1,i
|
||||
ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k))
|
||||
@ -951,7 +951,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
double precision :: X(0:max_dim)
|
||||
double precision :: Y(0:max_dim)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
|
||||
integer :: nx, ix,iy,ny
|
||||
integer :: nx, ix,iy,ny,ib
|
||||
|
||||
ASSERT (a>2)
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -974,8 +974,43 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
enddo
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_10,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_10,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
|
||||
if (nx >= 0) then
|
||||
select case (nx)
|
||||
case (0)
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(1) * X(0)
|
||||
d(2) = d(2) + B_10(2) * X(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0)
|
||||
d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0)
|
||||
d(3) = d(3) + B_10(2) * X(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0)
|
||||
d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0)
|
||||
d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1)
|
||||
d(4) = d(4) + B_10(2) * X(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0)
|
||||
do ib=2,nx
|
||||
d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2)
|
||||
enddo
|
||||
d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1)
|
||||
d(nx+2) = d(nx+2) + B_10(2) * X(nx)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nx+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
nx = nd
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -996,9 +1031,47 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
X(ix) *= c
|
||||
enddo
|
||||
endif
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
if(nx >= 0) then
|
||||
|
||||
select case (nx)
|
||||
case (0)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(2) * X(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0)
|
||||
d(3) = d(3) + B_00(2) * X(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0)
|
||||
d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1)
|
||||
d(4) = d(4) + B_00(2) * X(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
do ib=2,nx
|
||||
d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2)
|
||||
enddo
|
||||
d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1)
|
||||
d(nx+2) = d(nx+2) + B_00(2) * X(nx)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nx+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
ny=0
|
||||
@ -1016,8 +1089,45 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt
|
||||
endif
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
if(ny >= 0) then
|
||||
|
||||
select case (ny)
|
||||
case (0)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(2) * Y(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0)
|
||||
d(3) = d(3) + C_00(2) * Y(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0)
|
||||
d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1)
|
||||
d(4) = d(4) + C_00(2) * Y(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
do ib=2,ny
|
||||
d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2)
|
||||
enddo
|
||||
d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1)
|
||||
d(ny+2) = d(ny+2) + C_00(2) * Y(ny)
|
||||
|
||||
end select
|
||||
|
||||
do nd = ny+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
@ -1034,7 +1144,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
double precision :: X(0:max_dim)
|
||||
double precision :: Y(0:max_dim)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
|
||||
integer :: nx, ix,iy,ny
|
||||
integer :: nx, ix,iy,ny,ib
|
||||
|
||||
if( (c<0).or.(nd<0) )then
|
||||
nd = -1
|
||||
@ -1056,8 +1166,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
endif
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
if(nx >= 0) then
|
||||
|
||||
select case (nx)
|
||||
case (0)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(2) * X(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0)
|
||||
d(3) = d(3) + B_00(2) * X(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0)
|
||||
d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1)
|
||||
d(4) = d(4) + B_00(2) * X(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
do ib=2,nx
|
||||
d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2)
|
||||
enddo
|
||||
d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1)
|
||||
d(nx+2) = d(nx+2) + B_00(2) * X(nx)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nx+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
ny=0
|
||||
|
||||
@ -1068,8 +1214,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
if(ny >= 0) then
|
||||
|
||||
select case (ny)
|
||||
case (0)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(2) * Y(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0)
|
||||
d(3) = d(3) + C_00(2) * Y(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0)
|
||||
d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1)
|
||||
d(4) = d(4) + C_00(2) * Y(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
do ib=2,ny
|
||||
d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2)
|
||||
enddo
|
||||
d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1)
|
||||
d(ny+2) = d(ny+2) + C_00(2) * Y(ny)
|
||||
|
||||
end select
|
||||
|
||||
do nd = ny+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
@ -1087,7 +1269,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
double precision :: X(0:max_dim)
|
||||
double precision :: Y(0:max_dim)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
|
||||
integer :: nx, ix,iy,ny
|
||||
integer :: nx, ix,iy,ny,ib
|
||||
|
||||
!DIR$ LOOP COUNT(8)
|
||||
do ix=0,n_pt_in
|
||||
@ -1097,8 +1279,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_10,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_10,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd)
|
||||
if(nx >= 0) then
|
||||
|
||||
select case (nx)
|
||||
case (0)
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(1) * X(0)
|
||||
d(2) = d(2) + B_10(2) * X(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0)
|
||||
d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0)
|
||||
d(3) = d(3) + B_10(2) * X(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0)
|
||||
d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0)
|
||||
d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1)
|
||||
d(4) = d(4) + B_10(2) * X(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + B_10(0) * X(0)
|
||||
d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0)
|
||||
do ib=2,nx
|
||||
d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2)
|
||||
enddo
|
||||
d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1)
|
||||
d(nx+2) = d(nx+2) + B_10(2) * X(nx)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nx+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
nx = nd
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -1117,8 +1335,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
endif
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_00,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd)
|
||||
if(nx >= 0) then
|
||||
|
||||
select case (nx)
|
||||
case (0)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(2) * X(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0)
|
||||
d(3) = d(3) + B_00(2) * X(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0)
|
||||
d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1)
|
||||
d(4) = d(4) + B_00(2) * X(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + B_00(0) * X(0)
|
||||
d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0)
|
||||
do ib=2,nx
|
||||
d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2)
|
||||
enddo
|
||||
d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1)
|
||||
d(nx+2) = d(nx+2) + B_00(2) * X(nx)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nx+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
ny=0
|
||||
!DIR$ LOOP COUNT(8)
|
||||
@ -1129,8 +1383,45 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
|
||||
call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,C_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,C_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd)
|
||||
if(ny >= 0) then
|
||||
|
||||
select case (ny)
|
||||
case (0)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(2) * Y(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0)
|
||||
d(3) = d(3) + C_00(2) * Y(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0)
|
||||
d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1)
|
||||
d(4) = d(4) + C_00(2) * Y(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + C_00(0) * Y(0)
|
||||
d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0)
|
||||
do ib=2,ny
|
||||
d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2)
|
||||
enddo
|
||||
d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1)
|
||||
d(ny+2) = d(ny+2) + C_00(2) * Y(ny)
|
||||
|
||||
end select
|
||||
|
||||
do nd = ny+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
@ -1147,7 +1438,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
integer :: nx, ix,ny
|
||||
double precision :: X(0:max_dim),Y(0:max_dim)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
|
||||
integer :: i
|
||||
integer :: i, ib
|
||||
|
||||
select case (c)
|
||||
case (0)
|
||||
@ -1178,8 +1469,45 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
Y(2) = D_00(2)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,D_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,D_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
|
||||
if(ny >= 0) then
|
||||
|
||||
select case (ny)
|
||||
case (0)
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(1) * Y(0)
|
||||
d(2) = d(2) + D_00(2) * Y(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0)
|
||||
d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0)
|
||||
d(3) = d(3) + D_00(2) * Y(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0)
|
||||
d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0)
|
||||
d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1)
|
||||
d(4) = d(4) + D_00(2) * Y(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0)
|
||||
do ib=2,ny
|
||||
d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2)
|
||||
enddo
|
||||
d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1)
|
||||
d(ny+2) = d(ny+2) + D_00(2) * Y(ny)
|
||||
|
||||
end select
|
||||
|
||||
do nd = ny+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
|
||||
return
|
||||
|
||||
@ -1198,8 +1526,44 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
enddo
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(X,nx,B_01,2,d,nd)
|
||||
call multiply_poly_c2(X,nx,B_01,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd)
|
||||
if(nx >= 0) then
|
||||
|
||||
select case (nx)
|
||||
case (0)
|
||||
d(0) = d(0) + B_01(0) * X(0)
|
||||
d(1) = d(1) + B_01(1) * X(0)
|
||||
d(2) = d(2) + B_01(2) * X(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + B_01(0) * X(0)
|
||||
d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0)
|
||||
d(2) = d(2) + B_01(1) * X(1) + B_01(2) * X(0)
|
||||
d(3) = d(3) + B_01(2) * X(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + B_01(0) * X(0)
|
||||
d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0)
|
||||
d(2) = d(2) + B_01(0) * X(2) + B_01(1) * X(1) + B_01(2) * X(0)
|
||||
d(3) = d(3) + B_01(1) * X(2) + B_01(2) * X(1)
|
||||
d(4) = d(4) + B_01(2) * X(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + B_01(0) * X(0)
|
||||
d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0)
|
||||
do ib=2,nx
|
||||
d(ib) = d(ib) + B_01(0) * X(ib) + B_01(1) * X(ib-1) + B_01(2) * X(ib-2)
|
||||
enddo
|
||||
d(nx+1) = d(nx+1) + B_01(1) * X(nx) + B_01(2) * X(nx-1)
|
||||
d(nx+2) = d(nx+2) + B_01(2) * X(nx)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nx+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
ny = 0
|
||||
!DIR$ LOOP COUNT(6)
|
||||
@ -1209,8 +1573,45 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim)
|
||||
call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim)
|
||||
|
||||
! !DIR$ FORCEINLINE
|
||||
! call multiply_poly(Y,ny,D_00,2,d,nd)
|
||||
call multiply_poly_c2(Y,ny,D_00,d,nd)
|
||||
! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd)
|
||||
|
||||
if(ny >= 0) then
|
||||
|
||||
select case (ny)
|
||||
case (0)
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(1) * Y(0)
|
||||
d(2) = d(2) + D_00(2) * Y(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0)
|
||||
d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0)
|
||||
d(3) = d(3) + D_00(2) * Y(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0)
|
||||
d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0)
|
||||
d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1)
|
||||
d(4) = d(4) + D_00(2) * Y(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + D_00(0) * Y(0)
|
||||
d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0)
|
||||
do ib=2,ny
|
||||
d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2)
|
||||
enddo
|
||||
d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1)
|
||||
d(ny+2) = d(ny+2) + D_00(2) * Y(ny)
|
||||
|
||||
end select
|
||||
|
||||
do nd = ny+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
end select
|
||||
end
|
||||
@ -1232,7 +1633,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value)
|
||||
logical, external :: ao_two_e_integral_zero
|
||||
|
||||
integer :: i,k
|
||||
double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2
|
||||
double precision, external :: ao_two_e_integral
|
||||
double precision :: cpu_1,cpu_2, wall_1, wall_2
|
||||
double precision :: integral, wall_0
|
||||
double precision :: thr
|
||||
integer :: kk, m, j1, i1
|
||||
@ -1299,3 +1701,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd)
|
||||
end
|
||||
|
||||
|
||||
!DIR$ FORCEINLINE
|
||||
subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Multiply two polynomials
|
||||
! D(t) += B(t)*C(t)
|
||||
END_DOC
|
||||
|
||||
integer, intent(in) :: nb
|
||||
integer, intent(out) :: nd
|
||||
double precision, intent(in) :: b(0:nb), c(0:2)
|
||||
double precision, intent(inout) :: d(0:nb+2)
|
||||
|
||||
integer :: ndtmp
|
||||
integer :: ib, ic, id, k
|
||||
if(nb < 0) return !False if nb>=0
|
||||
|
||||
select case (nb)
|
||||
case (0)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(2) * b(0)
|
||||
|
||||
case (1)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(1) * b(1) + c(2) * b(0)
|
||||
d(3) = d(3) + c(2) * b(1)
|
||||
|
||||
case (2)
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0)
|
||||
d(3) = d(3) + c(1) * b(2) + c(2) * b(1)
|
||||
d(4) = d(4) + c(2) * b(2)
|
||||
|
||||
case default
|
||||
d(0) = d(0) + c(0) * b(0)
|
||||
d(1) = d(1) + c(0) * b(1) + c(1) * b(0)
|
||||
do ib=2,nb
|
||||
d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2)
|
||||
enddo
|
||||
d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1)
|
||||
d(nb+2) = d(nb+2) + c(2) * b(nb)
|
||||
|
||||
end select
|
||||
|
||||
do nd = nb+2,0,-1
|
||||
if (d(nd) /= 0.d0) exit
|
||||
enddo
|
||||
|
||||
end
|
||||
|
||||
|
@ -9,7 +9,7 @@ subroutine run_ccsd_space_orb
|
||||
double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
|
||||
logical :: not_converged
|
||||
|
||||
double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:)
|
||||
double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:)
|
||||
double precision, allocatable :: t1(:,:), r1(:,:)
|
||||
double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
|
||||
|
||||
@ -18,7 +18,12 @@ subroutine run_ccsd_space_orb
|
||||
integer(bit_kind) :: det(N_int,2)
|
||||
integer :: nO, nV, nOa, nVa
|
||||
|
||||
! PROVIDE mo_two_e_integrals_in_map
|
||||
if (do_ao_cholesky) then
|
||||
PROVIDE cholesky_mo_transp
|
||||
FREE cholesky_ao
|
||||
else
|
||||
PROVIDE mo_two_e_integrals_in_map
|
||||
endif
|
||||
|
||||
det = psi_det(:,:,cc_ref)
|
||||
print*,'Reference determinant:'
|
||||
@ -46,13 +51,39 @@ subroutine run_ccsd_space_orb
|
||||
|
||||
allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
|
||||
allocate(tau(nO,nO,nV,nV))
|
||||
allocate(tau_x(nO,nO,nV,nV))
|
||||
allocate(t1(nO,nV), r1(nO,nV))
|
||||
allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))
|
||||
|
||||
if (cc_update_method == 'diis') then
|
||||
allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth))
|
||||
all_err = 0d0
|
||||
all_t = 0d0
|
||||
double precision :: rss, diis_mem, extra_mem
|
||||
double precision, external :: memory_of_double
|
||||
diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV)
|
||||
call resident_memory(rss)
|
||||
do while (cc_diis_depth > 1)
|
||||
if (rss + diis_mem * cc_diis_depth > qp_max_mem) then
|
||||
cc_diis_depth = cc_diis_depth - 1
|
||||
else
|
||||
exit
|
||||
endif
|
||||
end do
|
||||
if (cc_diis_depth <= 1) then
|
||||
print *, 'Not enough memory for DIIS'
|
||||
stop -1
|
||||
endif
|
||||
print *, 'DIIS size ', cc_diis_depth
|
||||
|
||||
allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth))
|
||||
!$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED)
|
||||
do j=1,cc_diis_depth
|
||||
!$OMP DO
|
||||
do i=1, size(all_err,1)
|
||||
all_err(i,j) = 0d0
|
||||
all_t(i,j) = 0d0
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
enddo
|
||||
!$OMP END PARALLEL
|
||||
endif
|
||||
|
||||
if (elec_alpha_num /= elec_beta_num) then
|
||||
@ -67,10 +98,11 @@ subroutine run_ccsd_space_orb
|
||||
call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1)
|
||||
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2)
|
||||
call update_tau_space(nO,nV,t1,t2,tau)
|
||||
call update_tau_x_space(nO,nV,tau,tau_x)
|
||||
!print*,'hf_energy', hf_energy
|
||||
call det_energy(det,uncorr_energy)
|
||||
print*,'Det energy', uncorr_energy
|
||||
call ccsd_energy_space(nO,nV,tau,t1,energy)
|
||||
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
|
||||
print*,'Guess energy', uncorr_energy+energy, energy
|
||||
|
||||
nb_iter = 0
|
||||
@ -85,13 +117,23 @@ subroutine run_ccsd_space_orb
|
||||
|
||||
do while (not_converged)
|
||||
|
||||
call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
|
||||
call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
|
||||
call compute_H_vo(nO,nV,t1,t2,H_vo)
|
||||
|
||||
! Residue
|
||||
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
if (do_ao_cholesky) then
|
||||
! if (.False.) then
|
||||
call compute_H_oo_chol(nO,nV,tau_x,H_oo)
|
||||
call compute_H_vv_chol(nO,nV,tau_x,H_vv)
|
||||
call compute_H_vo_chol(nO,nV,t1,H_vo)
|
||||
|
||||
call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
else
|
||||
call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
|
||||
call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
|
||||
call compute_H_vo(nO,nV,t1,t2,H_vo)
|
||||
|
||||
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
endif
|
||||
max_r = max(max_r1,max_r2)
|
||||
|
||||
! Update
|
||||
@ -109,10 +151,11 @@ subroutine run_ccsd_space_orb
|
||||
endif
|
||||
|
||||
call update_tau_space(nO,nV,t1,t2,tau)
|
||||
call update_tau_x_space(nO,nV,tau,tau_x)
|
||||
|
||||
! Energy
|
||||
call ccsd_energy_space(nO,nV,tau,t1,energy)
|
||||
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
|
||||
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
|
||||
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
|
||||
|
||||
nb_iter = nb_iter + 1
|
||||
if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then
|
||||
@ -132,7 +175,7 @@ subroutine run_ccsd_space_orb
|
||||
print*,''
|
||||
write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha'
|
||||
write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
|
||||
write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r
|
||||
write(*,'(A15,ES10.2,A3)')' Conv = ', max_r
|
||||
print*,''
|
||||
|
||||
if (write_amplitudes) then
|
||||
@ -239,6 +282,51 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
|
||||
|
||||
end
|
||||
|
||||
subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: nO, nV
|
||||
double precision, intent(in) :: tau_x(nO,nO,nV,nV)
|
||||
double precision, intent(in) :: t1(nO,nV)
|
||||
double precision, intent(out) :: energy
|
||||
|
||||
! internal
|
||||
integer :: i,j,a,b
|
||||
double precision :: e
|
||||
|
||||
energy = 0d0
|
||||
!$omp parallel &
|
||||
!$omp shared(nO,nV,energy,tau_x,t1,&
|
||||
!$omp cc_space_f_vo,cc_space_v_oovv) &
|
||||
!$omp private(i,j,a,b,e) &
|
||||
!$omp default(none)
|
||||
e = 0d0
|
||||
!$omp do
|
||||
do a = 1, nV
|
||||
do i = 1, nO
|
||||
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do nowait
|
||||
!$omp do
|
||||
do b = 1, nV
|
||||
do a = 1, nV
|
||||
do j = 1, nO
|
||||
do i = 1, nO
|
||||
e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do nowait
|
||||
!$omp critical
|
||||
energy = energy + e
|
||||
!$omp end critical
|
||||
!$omp end parallel
|
||||
|
||||
end
|
||||
|
||||
! Tau
|
||||
|
||||
subroutine update_tau_space(nO,nV,t1,t2,tau)
|
||||
@ -274,6 +362,39 @@ subroutine update_tau_space(nO,nV,t1,t2,tau)
|
||||
|
||||
end
|
||||
|
||||
subroutine update_tau_x_space(nO,nV,tau,tau_x)
|
||||
|
||||
implicit none
|
||||
|
||||
! in
|
||||
integer, intent(in) :: nO, nV
|
||||
double precision, intent(in) :: tau(nO,nO,nV,nV)
|
||||
|
||||
! out
|
||||
double precision, intent(out) :: tau_x(nO,nO,nV,nV)
|
||||
|
||||
! internal
|
||||
integer :: i,j,a,b
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP SHARED(nO,nV,tau,tau_x) &
|
||||
!$OMP PRIVATE(i,j,a,b) &
|
||||
!$OMP DEFAULT(NONE)
|
||||
!$OMP DO
|
||||
do b = 1, nV
|
||||
do a = 1, nV
|
||||
do j = 1, nO
|
||||
do i = 1, nO
|
||||
tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
end
|
||||
|
||||
! R1
|
||||
|
||||
subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
@ -449,25 +570,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
! enddo
|
||||
! enddo
|
||||
!enddo
|
||||
|
||||
integer :: iblock, block_size, nVmax
|
||||
double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:)
|
||||
allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO))
|
||||
block_size = 8
|
||||
allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO))
|
||||
|
||||
!$omp parallel &
|
||||
!$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) &
|
||||
!$omp private(b,beta,i,a) &
|
||||
!$omp default(none)
|
||||
!$omp do
|
||||
do beta = 1, nV
|
||||
do i = 1, nO
|
||||
do b = 1, nV
|
||||
do a = 1, nV
|
||||
W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do nowait
|
||||
|
||||
!$omp do
|
||||
do u = 1, nO
|
||||
do i = 1, nO
|
||||
@ -481,10 +593,30 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
||||
!$omp end do nowait
|
||||
!$omp end parallel
|
||||
|
||||
call dgemm('T','N',nO,nV,nO*nV*nV, &
|
||||
1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), &
|
||||
W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), &
|
||||
1d0, r1 , size(r1,1))
|
||||
do iblock = 1, nV, block_size
|
||||
nVmax = min(block_size,nV-iblock+1)
|
||||
!$omp parallel &
|
||||
!$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) &
|
||||
!$omp private(b,i,a,beta) &
|
||||
!$omp default(none)
|
||||
!$omp do collapse(2)
|
||||
do beta = iblock, iblock + nVmax - 1
|
||||
do i = 1, nO
|
||||
do b = 1, nV
|
||||
do a = 1, nV
|
||||
W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do nowait
|
||||
!$omp end parallel
|
||||
|
||||
call dgemm('T','N',nO,nVmax,nO*nV*nV, &
|
||||
1d0, T_vvoo, nV*nV*nO, &
|
||||
W_vvov, nO*nV*nV, &
|
||||
1d0, r1(1,iblock), nO)
|
||||
enddo
|
||||
|
||||
deallocate(W_vvov,T_vvoo)
|
||||
|
||||
@ -839,6 +971,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
|
||||
! allocate(B1(nV,nV,nV,nV))
|
||||
! call compute_B1(nO,nV,t1,t2,B1)
|
||||
! call dgemm('N','N',nO*nO,nV*nV,nV*nV, &
|
||||
! 1d0, tau, size(tau,1) * size(tau,2), &
|
||||
! B1 , size(B1_gam,1) * size(B1_gam,2), &
|
||||
! 1d0, r2, size(r2,1) * size(r2,2))
|
||||
allocate(B1_gam(nV,nV,nV))
|
||||
do gam=1,nV
|
||||
call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam)
|
||||
@ -1323,7 +1459,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
!enddo
|
||||
|
||||
!$omp parallel &
|
||||
!$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) &
|
||||
!$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) &
|
||||
!$omp private(u,v,gam,beta,i,a) &
|
||||
!$omp default(none)
|
||||
!$omp do
|
||||
@ -1343,7 +1479,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
do v = 1, nO
|
||||
do a = 1, nV
|
||||
do i = 1, nO
|
||||
Z_ovov(i,a,v,beta) = t2(i,v,beta,a)
|
||||
Y_ovov(i,a,v,beta) = t2(i,v,beta,a)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
@ -1547,21 +1683,29 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam)
|
||||
! enddo
|
||||
|
||||
double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:)
|
||||
|
||||
allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV))
|
||||
! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam)
|
||||
|
||||
call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, &
|
||||
cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir(gam), B1)
|
||||
|
||||
|
||||
!$omp parallel &
|
||||
!$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) &
|
||||
!$omp private(a,b,beta) &
|
||||
!$omp default(none)
|
||||
!$omp do
|
||||
do beta = 1, nV
|
||||
do b = 1, nV
|
||||
do a = 1, nV
|
||||
B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do nowait
|
||||
|
||||
! !$omp do
|
||||
! do beta = 1, nV
|
||||
! do b = 1, nV
|
||||
! do a = 1, nV
|
||||
! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam)
|
||||
! enddo
|
||||
! enddo
|
||||
! enddo
|
||||
! !$omp end do nowait
|
||||
|
||||
do i = 1, nO
|
||||
!$omp do
|
||||
do b = 1, nV
|
||||
@ -1569,7 +1713,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam)
|
||||
X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam)
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do nowait
|
||||
!$omp end do
|
||||
enddo
|
||||
!$omp end parallel
|
||||
|
||||
|
1467
src/ccsd/ccsd_space_orb_sub_chol.irp.f
Normal file
1467
src/ccsd/ccsd_space_orb_sub_chol.irp.f
Normal file
File diff suppressed because it is too large
Load Diff
@ -241,7 +241,7 @@ subroutine run_ccsd_spin_orb
|
||||
call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy)
|
||||
call wall_time(tfi)
|
||||
|
||||
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', &
|
||||
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', &
|
||||
uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
|
||||
if (cc_dev) then
|
||||
print*,'Total:',tfi-tbi,'s'
|
||||
@ -266,7 +266,7 @@ subroutine run_ccsd_spin_orb
|
||||
print*,''
|
||||
write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha'
|
||||
write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha'
|
||||
write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r
|
||||
write(*,'(A15,ES10.2,A3)')' Conv = ', max_r
|
||||
print*,''
|
||||
|
||||
if (write_amplitudes) then
|
||||
|
@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy)
|
||||
|
||||
!$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED)
|
||||
e = 0d0
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do a = 1, nV
|
||||
do b = a+1, nV
|
||||
do c = b+1, nV
|
||||
|
@ -94,6 +94,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ
|
||||
enddo
|
||||
!$OMP END DO nowait
|
||||
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
|
||||
double precision, external :: ccsd_t_task_aba
|
||||
@ -209,9 +210,9 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ
|
||||
Pabc(:) = 1.d0/Pabc(:)
|
||||
|
||||
print '(A)', ''
|
||||
print '(A)', ' +----------------------+--------------+----------+'
|
||||
print '(A)', ' | E(CCSD(T)) | Error | % |'
|
||||
print '(A)', ' +----------------------+--------------+----------+'
|
||||
print '(A)', ' ======================= ============== =========='
|
||||
print '(A)', ' E(CCSD(T)) Error % '
|
||||
print '(A)', ' ======================= ============== =========='
|
||||
|
||||
|
||||
call wall_time(t00)
|
||||
@ -256,7 +257,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ
|
||||
if (imin >= bounds(2,isample)) then
|
||||
cycle
|
||||
endif
|
||||
ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)
|
||||
ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1
|
||||
|
||||
if (sampled(ieta) == -1_8) then
|
||||
sampled(ieta) = 0_8
|
||||
@ -280,9 +281,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ
|
||||
|
||||
call wall_time(t01)
|
||||
if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then
|
||||
t00 = t01
|
||||
|
||||
!$OMP TASKWAIT
|
||||
call wall_time(t01)
|
||||
t00 = t01
|
||||
|
||||
double precision :: ET, ET2
|
||||
double precision :: energy_stoch, energy_det
|
||||
@ -322,17 +324,20 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ
|
||||
|
||||
energy = energy_det + energy_stoch
|
||||
|
||||
print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc)
|
||||
print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc)
|
||||
endif
|
||||
!$OMP END MASTER
|
||||
if (imin >= Nabc) exit
|
||||
enddo
|
||||
|
||||
!$OMP END PARALLEL
|
||||
print '(A)', ' +----------------------+--------------+----------+'
|
||||
print '(A)', ' ======================= ============== ========== '
|
||||
print '(A)', ''
|
||||
|
||||
deallocate(X_vovv,X_ooov,T_voov,T_oovv)
|
||||
deallocate(X_vovv)
|
||||
deallocate(X_ooov)
|
||||
deallocate(T_voov)
|
||||
deallocate(T_oovv)
|
||||
end
|
||||
|
||||
|
||||
|
@ -591,7 +591,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
|
||||
time-time0
|
||||
|
||||
! Old print
|
||||
!print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, &
|
||||
!print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,ES16.6,ES16.6)', c, &
|
||||
! pt2_data % pt2(pt2_stoch_istate) +E, &
|
||||
! pt2_data_err % pt2(pt2_stoch_istate), &
|
||||
! pt2_data % variance(pt2_stoch_istate), &
|
||||
|
@ -331,7 +331,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -405,7 +405,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -398,7 +398,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -316,7 +316,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -327,7 +327,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -457,7 +457,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -477,7 +477,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -611,7 +611,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st)
|
||||
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -436,7 +436,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze,
|
||||
!don't print
|
||||
continue
|
||||
else
|
||||
write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, ES11.3))') iter-1, to_print(1:2,1:N_st)
|
||||
endif
|
||||
|
||||
! Check convergence
|
||||
|
@ -13,7 +13,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ]
|
||||
character*(32) :: env
|
||||
call getenv('QP_NTHREADS_DAVIDSON',env)
|
||||
if (trim(env) /= '') then
|
||||
call lock_io
|
||||
read(env,*) nthreads_davidson
|
||||
call unlock_io
|
||||
call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
|
||||
endif
|
||||
END_PROVIDER
|
||||
|
@ -117,7 +117,7 @@ END_PROVIDER
|
||||
!$OMP N_det_alpha_unique,N_det_beta_unique,irp_here)
|
||||
allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) )
|
||||
tmp_a = 0.d0
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do k_a=1,N_det
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
ASSERT (krow <= N_det_alpha_unique)
|
||||
@ -173,7 +173,7 @@ END_PROVIDER
|
||||
deallocate(tmp_a)
|
||||
|
||||
tmp_b = 0.d0
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do k_b=1,N_det
|
||||
krow = psi_bilinear_matrix_transp_rows(k_b)
|
||||
ASSERT (krow <= N_det_alpha_unique)
|
||||
|
@ -66,9 +66,9 @@ END_PROVIDER
|
||||
write(*,'(i16)',advance='no') i
|
||||
end do
|
||||
write(*,*) ''
|
||||
write(*,'(A17,100(1pE16.8))') 'x_dipole_moment = ',x_dipole_moment
|
||||
write(*,'(A17,100(1pE16.8))') 'y_dipole_moment = ',y_dipole_moment
|
||||
write(*,'(A17,100(1pE16.8))') 'z_dipole_moment = ',z_dipole_moment
|
||||
write(*,'(A17,100(ES16.8))') 'x_dipole_moment = ',x_dipole_moment
|
||||
write(*,'(A17,100(ES16.8))') 'y_dipole_moment = ',y_dipole_moment
|
||||
write(*,'(A17,100(ES16.8))') 'z_dipole_moment = ',z_dipole_moment
|
||||
!print*, 'x_dipole_moment = ',x_dipole_moment
|
||||
!print*, 'y_dipole_moment = ',y_dipole_moment
|
||||
!print*, 'z_dipole_moment = ',z_dipole_moment
|
||||
|
@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO schedule(dynamic,1024)
|
||||
!$OMP DO schedule(guided,64)
|
||||
do i=1,N_det-1
|
||||
if (duplicate(i)) then
|
||||
cycle
|
||||
|
@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
|
||||
!$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)&
|
||||
!$OMP REDUCTION(+:accu)
|
||||
allocate(idx(0:n))
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
do i = n,1,-1 ! Better OMP scheduling
|
||||
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
|
||||
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
|
||||
|
@ -1,2 +1,3 @@
|
||||
mpi
|
||||
zmq
|
||||
utils
|
||||
|
@ -5,7 +5,9 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ]
|
||||
! variable if it is set, or as the 1st argument of the command line.
|
||||
END_DOC
|
||||
|
||||
PROVIDE mpi_initialized
|
||||
PROVIDE mpi_initialized output_wall_time_0
|
||||
|
||||
integer :: i
|
||||
|
||||
! Get the QPACKAGE_INPUT environment variable
|
||||
call getenv('QPACKAGE_INPUT',ezfio_filename)
|
||||
@ -44,11 +46,14 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ]
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ]
|
||||
use c_functions
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! EZFIO/work/
|
||||
END_DOC
|
||||
call ezfio_set_work_empty(.False.)
|
||||
logical :: b
|
||||
b = mkl_serv_intel_cpu_true() /= 1
|
||||
call ezfio_set_work_empty(b)
|
||||
ezfio_work_dir = trim(ezfio_filename)//'/work/'
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -190,47 +190,75 @@ END_PROVIDER
|
||||
|
||||
deallocate(X)
|
||||
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
endif
|
||||
|
||||
|
||||
allocate(X2(ao_num,ao_num,cholesky_ao_num,2))
|
||||
double precision :: rss
|
||||
double precision :: memory_of_double
|
||||
|
||||
integer :: iblock
|
||||
integer, parameter :: block_size = 32
|
||||
|
||||
rss = memory_of_double(ao_num*ao_num)
|
||||
call check_mem(2.d0*block_size*rss, irp_here)
|
||||
allocate(X2(ao_num,ao_num,block_size,2))
|
||||
allocate(X3(ao_num,block_size,ao_num,2))
|
||||
|
||||
! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_alpha, ao_num, &
|
||||
cholesky_ao, ao_num, 0.d0, &
|
||||
X2(1,1,1,1), ao_num)
|
||||
do iblock=1,cholesky_ao_num,block_size
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_beta, ao_num, &
|
||||
cholesky_ao, ao_num, 0.d0, &
|
||||
X2(1,1,1,2), ao_num)
|
||||
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_alpha, ao_num, &
|
||||
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
|
||||
X2(1,1,1,1), ao_num)
|
||||
|
||||
allocate(X3(ao_num,cholesky_ao_num,ao_num,2))
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, &
|
||||
SCF_density_matrix_ao_beta, ao_num, &
|
||||
cholesky_ao(1,1,iblock), ao_num, 0.d0, &
|
||||
X2(1,1,1,2), ao_num)
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,min(cholesky_ao_num-iblock+1,block_size)
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
X3(m,j,s,2) = X2(m,s,j,2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
else
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,min(cholesky_ao_num-iblock+1,block_size)
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, &
|
||||
X3(1,1,1,1), ao_num*block_size, 1.d0, &
|
||||
ao_two_e_integral_alpha_chol, ao_num)
|
||||
|
||||
if (elec_alpha_num > elec_beta_num) then
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, &
|
||||
cholesky_ao(1,1,iblock), ao_num, &
|
||||
X3(1,1,1,2), ao_num*block_size, 1.d0, &
|
||||
ao_two_e_integral_beta_chol, ao_num)
|
||||
endif
|
||||
|
||||
do s=1,ao_num
|
||||
do j=1,cholesky_ao_num
|
||||
do m=1,ao_num
|
||||
X3(m,j,s,1) = X2(m,s,j,1)
|
||||
X3(m,j,s,2) = X2(m,s,j,2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(X2)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
|
||||
cholesky_ao, ao_num, &
|
||||
X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, &
|
||||
ao_two_e_integral_alpha_chol, ao_num)
|
||||
|
||||
call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, &
|
||||
cholesky_ao, ao_num, &
|
||||
X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, &
|
||||
ao_two_e_integral_beta_chol, ao_num)
|
||||
|
||||
deallocate(X3)
|
||||
if (elec_alpha_num == elec_beta_num) then
|
||||
ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol
|
||||
endif
|
||||
deallocate(X2,X3)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -111,7 +111,7 @@ subroutine first_gradient_opt(n,v_grad)
|
||||
if (debug) then
|
||||
print*,'Matrix containing the gradient :'
|
||||
do i = 1, mo_num
|
||||
write(*,'(100(E12.5))') A(i,1:mo_num)
|
||||
write(*,'(100(ES12.5))') A(i,1:mo_num)
|
||||
enddo
|
||||
endif
|
||||
|
||||
|
@ -1,49 +1,51 @@
|
||||
BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ]
|
||||
BEGIN_PROVIDER [ integer, cholesky_mo_num ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Cholesky vectors in MO basis
|
||||
! Number of Cholesky vectors in MO basis
|
||||
END_DOC
|
||||
|
||||
integer :: k
|
||||
|
||||
call set_multiple_levels_omp(.False.)
|
||||
print *, 'AO->MO Transformation of Cholesky vectors'
|
||||
!$OMP PARALLEL DO PRIVATE(k)
|
||||
do k=1,cholesky_ao_num
|
||||
call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
print *, ''
|
||||
|
||||
cholesky_mo_num = cholesky_ao_num
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ]
|
||||
BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Cholesky vectors in MO basis
|
||||
END_DOC
|
||||
|
||||
integer :: i,j,k
|
||||
double precision, allocatable :: buffer(:,:)
|
||||
|
||||
print *, 'AO->MO Transformation of Cholesky vectors .'
|
||||
integer :: k, i, j
|
||||
|
||||
call set_multiple_levels_omp(.False.)
|
||||
!$OMP PARALLEL PRIVATE(i,j,k,buffer)
|
||||
allocate(buffer(mo_num,mo_num))
|
||||
!$OMP DO SCHEDULE(static)
|
||||
do k=1,cholesky_ao_num
|
||||
call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num)
|
||||
!$OMP PARALLEL DO PRIVATE(k)
|
||||
do k=1,cholesky_mo_num
|
||||
do j=1,mo_num
|
||||
do i=1,mo_num
|
||||
cholesky_mo_transp(k,i,j) = buffer(i,j)
|
||||
cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
deallocate(buffer)
|
||||
!$OMP END PARALLEL
|
||||
print *, ''
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Cholesky vectors in MO basis
|
||||
END_DOC
|
||||
|
||||
double precision, allocatable :: X(:,:,:)
|
||||
integer :: ierr
|
||||
print *, 'AO->MO Transformation of Cholesky vectors'
|
||||
|
||||
allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr)
|
||||
if (ierr /= 0) then
|
||||
print *, irp_here, ': Allocation failed'
|
||||
endif
|
||||
call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, &
|
||||
cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num)
|
||||
call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, &
|
||||
X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num)
|
||||
deallocate(X)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
@ -13,14 +13,14 @@
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:)
|
||||
allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num))
|
||||
allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num))
|
||||
do j=1,mo_num
|
||||
buffer_jj(:,j) = cholesky_mo_transp(:,j,j)
|
||||
enddo
|
||||
|
||||
call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, &
|
||||
cholesky_mo_transp, cholesky_ao_num, &
|
||||
buffer_jj, cholesky_ao_num, 0.d0, &
|
||||
call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_mo_num, 1.d0, &
|
||||
cholesky_mo_transp, cholesky_mo_num, &
|
||||
buffer_jj, cholesky_mo_num, 0.d0, &
|
||||
buffer, mo_num*mo_num)
|
||||
|
||||
do k = 1, mo_num
|
||||
@ -36,9 +36,9 @@
|
||||
|
||||
do j = 1, mo_num
|
||||
|
||||
call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, &
|
||||
cholesky_mo_transp(1,1,j), cholesky_ao_num, &
|
||||
cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, &
|
||||
call dgemm('T','N',mo_num,mo_num,cholesky_mo_num, 1.d0, &
|
||||
cholesky_mo_transp(1,1,j), cholesky_mo_num, &
|
||||
cholesky_mo_transp(1,1,j), cholesky_mo_num, 0.d0, &
|
||||
buffer_jj, mo_num)
|
||||
|
||||
do k=1,mo_num
|
||||
|
@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
|
||||
call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map)
|
||||
print*, 'MO integrals provided'
|
||||
return
|
||||
else
|
||||
endif
|
||||
|
||||
if (.not. do_direct_integrals) then
|
||||
PROVIDE ao_two_e_integrals_in_map
|
||||
endif
|
||||
|
||||
@ -90,6 +92,10 @@ subroutine four_idx_dgemm
|
||||
double precision, allocatable :: a1(:,:,:,:)
|
||||
double precision, allocatable :: a2(:,:,:,:)
|
||||
|
||||
if (ao_num > 1289) then
|
||||
print *, irp_here, ': Integer overflow in ao_num**3'
|
||||
endif
|
||||
|
||||
allocate (a1(ao_num,ao_num,ao_num,ao_num))
|
||||
|
||||
print *, 'Getting AOs'
|
||||
@ -103,6 +109,7 @@ subroutine four_idx_dgemm
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
|
||||
print *, '1st transformation'
|
||||
! 1st transformation
|
||||
allocate (a2(ao_num,ao_num,ao_num,mo_num))
|
||||
@ -166,11 +173,9 @@ subroutine four_idx_dgemm
|
||||
|
||||
deallocate (a1)
|
||||
|
||||
call map_sort(mo_integrals_map)
|
||||
call map_unique(mo_integrals_map)
|
||||
|
||||
integer*8 :: get_mo_map_size, mo_map_size
|
||||
mo_map_size = get_mo_map_size()
|
||||
|
||||
end subroutine
|
||||
|
||||
subroutine add_integrals_to_map(mask_ijkl)
|
||||
@ -250,7 +255,7 @@ subroutine add_integrals_to_map(mask_ijkl)
|
||||
|
||||
call wall_time(wall_1)
|
||||
|
||||
size_buffer = min(ao_num*ao_num*ao_num,8000000)
|
||||
size_buffer = min(ao_num*ao_num,8000000)
|
||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
|
||||
|
||||
@ -443,11 +448,6 @@ subroutine add_integrals_to_map(mask_ijkl)
|
||||
!$OMP END PARALLEL
|
||||
call map_merge(mo_integrals_map)
|
||||
|
||||
call wall_time(wall_2)
|
||||
call cpu_time(cpu_2)
|
||||
integer*8 :: get_mo_map_size, mo_map_size
|
||||
mo_map_size = get_mo_map_size()
|
||||
|
||||
deallocate(list_ijkl)
|
||||
|
||||
|
||||
@ -465,51 +465,53 @@ subroutine add_integrals_to_map_cholesky
|
||||
integer :: size_buffer, n_integrals
|
||||
size_buffer = min(mo_num*mo_num*mo_num,16000000)
|
||||
|
||||
double precision, allocatable :: Vtmp(:,:,:,:)
|
||||
double precision, allocatable :: Vtmp(:,:,:)
|
||||
integer(key_kind) , allocatable :: buffer_i(:)
|
||||
real(integral_kind), allocatable :: buffer_value(:)
|
||||
|
||||
if (.True.) then
|
||||
! In-memory transformation
|
||||
call set_multiple_levels_omp(.False.)
|
||||
|
||||
allocate (Vtmp(mo_num,mo_num,mo_num,mo_num))
|
||||
!$OMP PARALLEL DEFAULT(SHARED) &
|
||||
!$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp)
|
||||
allocate (buffer_i(size_buffer), buffer_value(size_buffer))
|
||||
allocate (Vtmp(mo_num,mo_num,mo_num))
|
||||
n_integrals = 0
|
||||
|
||||
call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, &
|
||||
cholesky_mo, mo_num*mo_num, &
|
||||
cholesky_mo, mo_num*mo_num, 0.d0, &
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do l=1,mo_num
|
||||
call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_mo_num,1.d0, &
|
||||
cholesky_mo_transp, cholesky_mo_num, &
|
||||
cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, &
|
||||
Vtmp, mo_num*mo_num)
|
||||
|
||||
!$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i)
|
||||
allocate (buffer_i(size_buffer), buffer_value(size_buffer))
|
||||
n_integrals = 0
|
||||
!$OMP DO
|
||||
do l=1,mo_num
|
||||
do k=1,l
|
||||
do j=1,mo_num
|
||||
do i=1,j
|
||||
if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then
|
||||
n_integrals += 1
|
||||
buffer_value(n_integrals) = Vtmp(i,j,k,l)
|
||||
!DIR$ FORCEINLINE
|
||||
call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals))
|
||||
if (n_integrals == size_buffer) then
|
||||
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||
n_integrals = 0
|
||||
endif
|
||||
do k=1,l
|
||||
do j=1,mo_num
|
||||
do i=1,j
|
||||
if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then
|
||||
n_integrals = n_integrals + 1
|
||||
buffer_value(n_integrals) = Vtmp(i,j,k)
|
||||
!DIR$ FORCEINLINE
|
||||
call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals))
|
||||
if (n_integrals == size_buffer) then
|
||||
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||
n_integrals = 0
|
||||
endif
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
if (n_integrals > 0) then
|
||||
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||
deallocate(buffer_i, buffer_value)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(Vtmp)
|
||||
call map_unique(mo_integrals_map)
|
||||
|
||||
endif
|
||||
deallocate(buffer_i, buffer_value, Vtmp)
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call map_sort(mo_integrals_map)
|
||||
call map_unique(mo_integrals_map)
|
||||
|
||||
end
|
||||
|
||||
@ -580,6 +582,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
||||
return
|
||||
endif
|
||||
|
||||
if (ao_num > 1289) then
|
||||
print *, irp_here, ': Integer overflow in ao_num**3'
|
||||
endif
|
||||
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
||||
print*, 'Providing the molecular integrals '
|
||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||
@ -855,6 +860,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
|
||||
call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int )
|
||||
call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int )
|
||||
|
||||
if (ao_num > 1289) then
|
||||
print *, irp_here, ': Integer overflow in ao_num**3'
|
||||
endif
|
||||
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
||||
print*, 'Providing the molecular integrals '
|
||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||
@ -1350,16 +1358,29 @@ END_PROVIDER
|
||||
! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij
|
||||
END_DOC
|
||||
|
||||
integer :: i,j
|
||||
integer :: i,j,k
|
||||
double precision :: get_two_e_integral
|
||||
|
||||
|
||||
if (do_ao_cholesky) then
|
||||
double precision, allocatable :: buffer(:,:)
|
||||
allocate (buffer(cholesky_mo_num,mo_num))
|
||||
do k=1,cholesky_mo_num
|
||||
do i=1,mo_num
|
||||
buffer(k,i) = cholesky_mo_transp(k,i,i)
|
||||
enddo
|
||||
enddo
|
||||
call dgemm('T','N',mo_num,mo_num,cholesky_mo_num,1.d0, &
|
||||
buffer, cholesky_mo_num, buffer, cholesky_mo_num, 0.d0, mo_two_e_integrals_jj, mo_num)
|
||||
deallocate(buffer)
|
||||
|
||||
do j=1,mo_num
|
||||
do i=1,mo_num
|
||||
!TODO: use dgemm
|
||||
mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j))
|
||||
mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i))
|
||||
mo_two_e_integrals_jj_exchange(i,j) = 0.d0
|
||||
do k=1,cholesky_mo_num
|
||||
mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + &
|
||||
cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
|
@ -62,7 +62,7 @@ subroutine KMat_tilde_dump()
|
||||
do j = 1, mo_num
|
||||
do i = 1, mo_num
|
||||
! TCHint convention
|
||||
write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l
|
||||
write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
@ -71,7 +71,7 @@ subroutine KMat_tilde_dump()
|
||||
do j = 1, mo_num
|
||||
do i = 1, mo_num
|
||||
! TCHint convention
|
||||
write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0
|
||||
write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0
|
||||
enddo
|
||||
enddo
|
||||
|
||||
@ -128,7 +128,7 @@ subroutine ERI_dump()
|
||||
do k = 1, mo_num
|
||||
do j = 1, mo_num
|
||||
do i = 1, mo_num
|
||||
write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l)
|
||||
write(33, '(4(I4, 2X), 4X, ES15.7)') i, j, k, l, a1(i,j,k,l)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
@ -167,8 +167,8 @@ subroutine LMat_tilde_dump()
|
||||
!write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral
|
||||
! TCHint convention
|
||||
if(dabs(integral).gt.1d-10) then
|
||||
write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n
|
||||
!write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k
|
||||
write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n
|
||||
!write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
|
@ -72,7 +72,7 @@ subroutine molden_lr
|
||||
write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00'
|
||||
do k = 1, ao_prim_num(i_ao)
|
||||
i_prim +=1
|
||||
write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
enddo
|
||||
l = i_ao
|
||||
do while ( ao_l(l) == ao_l(i_ao) )
|
||||
@ -170,7 +170,7 @@ subroutine molden_lr
|
||||
write (i_unit_output,*) 'Spin= Alpha'
|
||||
write (i_unit_output,*) 'Occup=', mo_occ(i)
|
||||
do j=1,ao_num
|
||||
write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i)
|
||||
write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i)
|
||||
enddo
|
||||
|
||||
write (i_unit_output,*) 'Sym= 1'
|
||||
@ -178,7 +178,7 @@ subroutine molden_lr
|
||||
write (i_unit_output,*) 'Spin= Alpha'
|
||||
write (i_unit_output,*) 'Occup=', mo_occ(i)
|
||||
do j=1,ao_num
|
||||
write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i)
|
||||
write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i)
|
||||
enddo
|
||||
enddo
|
||||
close(i_unit_output)
|
||||
@ -235,7 +235,7 @@ subroutine molden_l()
|
||||
write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00'
|
||||
do k = 1, ao_prim_num(i_ao)
|
||||
i_prim +=1
|
||||
write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
enddo
|
||||
l = i_ao
|
||||
do while ( ao_l(l) == ao_l(i_ao) )
|
||||
@ -333,7 +333,7 @@ subroutine molden_l()
|
||||
write (i_unit_output,*) 'Spin= Alpha'
|
||||
write (i_unit_output,*) 'Occup=', mo_occ(i)
|
||||
do j=1,ao_num
|
||||
write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i)
|
||||
write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i)
|
||||
enddo
|
||||
enddo
|
||||
close(i_unit_output)
|
||||
@ -390,7 +390,7 @@ subroutine molden_r()
|
||||
write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00'
|
||||
do k = 1, ao_prim_num(i_ao)
|
||||
i_prim +=1
|
||||
write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
enddo
|
||||
l = i_ao
|
||||
do while ( ao_l(l) == ao_l(i_ao) )
|
||||
@ -488,7 +488,7 @@ subroutine molden_r()
|
||||
write (i_unit_output,*) 'Spin= Alpha'
|
||||
write (i_unit_output,*) 'Occup=', mo_occ(i)
|
||||
do j=1,ao_num
|
||||
write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i)
|
||||
write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i)
|
||||
enddo
|
||||
enddo
|
||||
close(i_unit_output)
|
||||
|
@ -44,7 +44,7 @@ program molden
|
||||
write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00'
|
||||
do k = 1, ao_prim_num(i_ao)
|
||||
i_prim +=1
|
||||
write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
|
||||
enddo
|
||||
l = i_ao
|
||||
do while ( ao_l(l) == ao_l(i_ao) )
|
||||
@ -142,7 +142,7 @@ program molden
|
||||
write (i_unit_output,*) 'Spin= Alpha'
|
||||
write (i_unit_output,*) 'Occup=', mo_occ(i)
|
||||
do j=1,ao_num
|
||||
write(i_unit_output, '(I6,2X,E20.10)') j, mo_coef(iorder(j),i)
|
||||
write(i_unit_output, '(I6,2X,ES20.10)') j, mo_coef(iorder(j),i)
|
||||
enddo
|
||||
enddo
|
||||
close(i_unit_output)
|
||||
|
@ -28,7 +28,7 @@ subroutine routine
|
||||
do i = 1, N_det
|
||||
print *, 'Determinant ', i
|
||||
call debug_det(psi_det(1,1,i),N_int)
|
||||
print '(4E20.12,X)', (psi_coef(i,k), k=1,N_states)
|
||||
print '(4ES20.12,X)', (psi_coef(i,k), k=1,N_states)
|
||||
print *, ''
|
||||
print *, ''
|
||||
enddo
|
||||
|
@ -57,6 +57,12 @@ module c_functions
|
||||
end subroutine sscanf_sd_c
|
||||
end interface
|
||||
|
||||
interface
|
||||
integer(kind=c_int) function mkl_serv_intel_cpu_true() bind(C)
|
||||
use iso_c_binding
|
||||
end function
|
||||
end interface
|
||||
|
||||
contains
|
||||
|
||||
integer function atoi(a)
|
||||
@ -131,4 +137,3 @@ subroutine usleep(us)
|
||||
call usleep_c(u)
|
||||
end subroutine usleep
|
||||
|
||||
|
||||
|
5
src/utils/fast_mkl.c
Normal file
5
src/utils/fast_mkl.c
Normal file
@ -0,0 +1,5 @@
|
||||
int mkl_serv_intel_cpu_true() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err
|
||||
write(str_size,'(I3)') size_nb
|
||||
|
||||
! Error
|
||||
write(str_exp,'(1pE20.0)') error
|
||||
write(str_exp,'(ES20.0)') error
|
||||
str_error = trim(adjustl(str_exp))
|
||||
|
||||
! Number of digit: Y (FX.Y) from the exponent
|
||||
|
@ -9,7 +9,6 @@
|
||||
|
||||
void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
|
||||
{
|
||||
int i;
|
||||
int fd;
|
||||
int result;
|
||||
void* map;
|
||||
@ -22,11 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
|
||||
perror("Error opening mmap file for reading");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0);
|
||||
if (map == MAP_FAILED) {
|
||||
/* try again without huge pages */
|
||||
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0);
|
||||
}
|
||||
map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -53,16 +48,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0);
|
||||
if (map == MAP_FAILED) {
|
||||
/* try again without huge pages */
|
||||
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
}
|
||||
map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
}
|
||||
|
||||
if (map == MAP_FAILED) {
|
||||
close(fd);
|
||||
printf("%s:\n", filename);
|
||||
printf("%s: %lu\n", filename, bytes);
|
||||
perror("Error mmapping the file");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@ -1565,7 +1565,7 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh)
|
||||
! Remove tiny elements
|
||||
do j=1,n
|
||||
do i=1,m
|
||||
if ( dabs(A(i,j) * amax) < thresh ) then
|
||||
if ( (dabs(A(i,j) * amax) < thresh).or.(dabs(A(i,j)) < 1.d-99) ) then
|
||||
A(i,j) = 0.d0
|
||||
endif
|
||||
enddo
|
||||
@ -1661,7 +1661,15 @@ subroutine restore_symmetry(m,n,A,LDA,thresh)
|
||||
! Update i
|
||||
i = i + 1
|
||||
enddo
|
||||
copy(i:) = 0.d0
|
||||
|
||||
! To nullify the remaining elements that are below the threshold
|
||||
if (i == sze) then
|
||||
if (-copy(i) <= thresh) then
|
||||
copy(i) = 0d0
|
||||
endif
|
||||
else
|
||||
copy(i:) = 0.d0
|
||||
endif
|
||||
|
||||
!$OMP PARALLEL if (sze>10000) &
|
||||
!$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) &
|
||||
|
@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map)
|
||||
|
||||
integer*8 :: n_elements
|
||||
n_elements = int(map % n_elements,8)
|
||||
if (n_elements <= 0) then
|
||||
print *, 'Unable to write map to disk: n_elements = ', n_elements
|
||||
stop -1
|
||||
endif
|
||||
|
||||
|
||||
if (map % consolidated) then
|
||||
|
@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ]
|
||||
! Maximum memory in Gb
|
||||
END_DOC
|
||||
character*(128) :: env
|
||||
integer, external :: get_total_available_memory
|
||||
|
||||
qp_max_mem = 2000
|
||||
qp_max_mem = get_total_available_memory()
|
||||
call write_int(6,qp_max_mem,'Total available memory (GB)')
|
||||
call getenv('QP_MAXMEM',env)
|
||||
if (trim(env) /= '') then
|
||||
call lock_io()
|
||||
@ -97,16 +99,15 @@ subroutine check_mem(rss_in,routine)
|
||||
END_DOC
|
||||
double precision, intent(in) :: rss_in
|
||||
character*(*) :: routine
|
||||
double precision :: rss
|
||||
!$OMP CRITICAL
|
||||
call resident_memory(rss)
|
||||
rss += rss_in
|
||||
if (int(rss)+1 > qp_max_mem) then
|
||||
double precision :: mem
|
||||
call total_memory(mem)
|
||||
mem += rss_in
|
||||
if (mem > qp_max_mem) then
|
||||
call print_memory_usage()
|
||||
print *, 'Not enough memory: aborting in ', routine
|
||||
print *, int(rss)+1, ' GB required'
|
||||
print *, mem, ' GB required'
|
||||
stop -1
|
||||
endif
|
||||
!$OMP END CRITICAL
|
||||
end
|
||||
|
||||
subroutine print_memory_usage()
|
||||
@ -122,3 +123,35 @@ subroutine print_memory_usage()
|
||||
'.. >>>>> [ RES MEM : ', rss , &
|
||||
' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..'
|
||||
end
|
||||
|
||||
integer function get_total_available_memory() result(res)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the total available memory on the current machine
|
||||
END_DOC
|
||||
|
||||
character(len=128) :: line
|
||||
integer :: status
|
||||
integer :: iunit
|
||||
integer*8, parameter :: KB = 1024
|
||||
integer*8, parameter :: GiB = 1024**3
|
||||
integer, external :: getUnitAndOpen
|
||||
|
||||
iunit = getUnitAndOpen('/proc/meminfo','r')
|
||||
|
||||
res = 512
|
||||
do
|
||||
read(iunit, '(A)', END=10) line
|
||||
if (line(1:10) == "MemTotal: ") then
|
||||
read(line(11:), *, ERR=20) res
|
||||
res = int((res*KB) / GiB,4)
|
||||
exit
|
||||
20 continue
|
||||
end if
|
||||
end do
|
||||
10 continue
|
||||
close(iunit)
|
||||
|
||||
end function get_total_available_memory
|
||||
|
||||
|
||||
|
@ -46,7 +46,13 @@ module mmap_module
|
||||
integer(c_size_t) :: length
|
||||
integer(c_int) :: fd_
|
||||
|
||||
length = PRODUCT( shape(:) ) * bytes
|
||||
integer :: i
|
||||
|
||||
length = int(bytes,8)
|
||||
do i=1,size(shape)
|
||||
length = length * shape(i)
|
||||
enddo
|
||||
|
||||
if (read_only) then
|
||||
map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1)
|
||||
else
|
||||
@ -66,7 +72,12 @@ module mmap_module
|
||||
integer(c_size_t) :: length
|
||||
integer(c_int) :: fd_
|
||||
|
||||
length = PRODUCT( shape(:) ) * bytes
|
||||
integer :: i
|
||||
|
||||
length = int(bytes,8)
|
||||
do i=1,size(shape)
|
||||
length = length * shape(i)
|
||||
enddo
|
||||
fd_ = fd
|
||||
call c_munmap_fortran( length, fd_, map)
|
||||
end subroutine
|
||||
@ -82,7 +93,12 @@ module mmap_module
|
||||
integer(c_size_t) :: length
|
||||
integer(c_int) :: fd_
|
||||
|
||||
length = PRODUCT( shape(:) ) * bytes
|
||||
integer :: i
|
||||
|
||||
length = int(bytes,8)
|
||||
do i=1,size(shape)
|
||||
length = length * shape(i)
|
||||
enddo
|
||||
fd_ = fd
|
||||
call c_msync_fortran( length, fd_, map)
|
||||
end subroutine
|
||||
|
@ -48,32 +48,31 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
|
||||
integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k
|
||||
|
||||
if (do_ao_cholesky) then
|
||||
double precision, allocatable :: buffer(:,:,:)
|
||||
!$OMP PARALLEL &
|
||||
!$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) &
|
||||
!$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)&
|
||||
!$OMP DEFAULT(NONE)
|
||||
allocate(buffer(mo_num,mo_num,mo_num))
|
||||
!$OMP DO
|
||||
double precision, allocatable :: buffer(:,:,:,:)
|
||||
double precision, allocatable :: v1(:,:,:), v2(:,:,:)
|
||||
allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4))
|
||||
allocate(buffer(n1,n3,n2,n4))
|
||||
|
||||
call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_mo_num)
|
||||
call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_mo_num)
|
||||
|
||||
call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, &
|
||||
v1, cholesky_mo_num, &
|
||||
v2, cholesky_mo_num, 0.d0, buffer, n1*n3)
|
||||
|
||||
deallocate(v1,v2)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4)
|
||||
do i4 = 1, n4
|
||||
idx4 = list4(i4)
|
||||
call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, &
|
||||
cholesky_mo_transp, cholesky_ao_num, &
|
||||
cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num)
|
||||
do i2 = 1, n2
|
||||
idx2 = list2(i2)
|
||||
do i3 = 1, n3
|
||||
idx3 = list3(i3)
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
idx1 = list1(i1)
|
||||
v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2)
|
||||
v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
deallocate(buffer)
|
||||
!$OMP END PARALLEL
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
double precision :: get_two_e_integral
|
||||
@ -105,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v)
|
||||
|
||||
end
|
||||
|
||||
subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv)
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: n1,n3,ldv
|
||||
integer, intent(in) :: list1(n1),list3(n3)
|
||||
double precision, intent(out) :: v(ldv,n1,n3)
|
||||
|
||||
integer :: i1,i3,idx1,idx3,k
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k)
|
||||
do i3=1,n3
|
||||
idx3 = list3(i3)
|
||||
do i1=1,n1
|
||||
idx1 = list1(i1)
|
||||
do k=1,cholesky_mo_num
|
||||
v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
end
|
||||
|
||||
! full
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
|
||||
@ -112,16 +135,17 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)]
|
||||
if (do_ao_cholesky) then
|
||||
integer :: i1,i2,i3,i4
|
||||
double precision, allocatable :: buffer(:,:,:)
|
||||
call set_multiple_levels_omp(.False.)
|
||||
!$OMP PARALLEL &
|
||||
!$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) &
|
||||
!$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_mo_num) &
|
||||
!$OMP PRIVATE(i1,i2,i3,i4,k,buffer)&
|
||||
!$OMP DEFAULT(NONE)
|
||||
allocate(buffer(mo_num,mo_num,mo_num))
|
||||
!$OMP DO
|
||||
do i4 = 1, mo_num
|
||||
call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, &
|
||||
cholesky_mo_transp, cholesky_ao_num, &
|
||||
cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num)
|
||||
call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_mo_num, 1.d0, &
|
||||
cholesky_mo_transp, cholesky_mo_num, &
|
||||
cholesky_mo_transp(1,1,i4), cholesky_mo_num, 0.d0, buffer, mo_num*mo_num)
|
||||
do i2 = 1, mo_num
|
||||
do i3 = 1, mo_num
|
||||
do i1 = 1, mo_num
|
||||
@ -166,7 +190,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_oooo,1)
|
||||
n2 = size(cc_space_v_oooo,2)
|
||||
n3 = size(cc_space_v_oooo,3)
|
||||
n4 = size(cc_space_v_oooo,4)
|
||||
|
||||
double precision, allocatable :: buffer(:,:,:,:)
|
||||
allocate(buffer(n1,n3,n2,n4))
|
||||
|
||||
call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, &
|
||||
cc_space_v_oo_chol, cholesky_mo_num, &
|
||||
cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_oooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
deallocate(buffer)
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -176,7 +233,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_vooo,1)
|
||||
n2 = size(cc_space_v_vooo,2)
|
||||
n3 = size(cc_space_v_vooo,3)
|
||||
n4 = size(cc_space_v_vooo,4)
|
||||
|
||||
double precision, allocatable :: buffer(:,:,:,:)
|
||||
allocate(buffer(n1,n3,n2,n4))
|
||||
|
||||
call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, &
|
||||
cc_space_v_vo_chol, cholesky_mo_num, &
|
||||
cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_vooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
deallocate(buffer)
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -186,7 +276,32 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo)
|
||||
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_ovoo,1)
|
||||
n2 = size(cc_space_v_ovoo,2)
|
||||
n3 = size(cc_space_v_ovoo,3)
|
||||
n4 = size(cc_space_v_ovoo,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_ovoo(i1,i2,i3,i4) = cc_space_v_vooo(i2,i1,i4,i3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -196,7 +311,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_oovo,1)
|
||||
n2 = size(cc_space_v_oovo,2)
|
||||
n3 = size(cc_space_v_oovo,3)
|
||||
n4 = size(cc_space_v_oovo,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_oovo(i1,i2,i3,i4) = cc_space_v_vooo(i3,i2,i1,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -206,7 +345,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_oovo,1)
|
||||
n2 = size(cc_space_v_oovo,2)
|
||||
n3 = size(cc_space_v_oovo,3)
|
||||
n4 = size(cc_space_v_oovo,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_ooov(i1,i2,i3,i4) = cc_space_v_ovoo(i1,i4,i3,i2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -216,7 +379,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_vvoo,1)
|
||||
n2 = size(cc_space_v_vvoo,2)
|
||||
n3 = size(cc_space_v_vvoo,3)
|
||||
n4 = size(cc_space_v_vvoo,4)
|
||||
|
||||
double precision, allocatable :: buffer(:,:,:,:)
|
||||
allocate(buffer(n1,n3,n2,n4))
|
||||
|
||||
call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, &
|
||||
cc_space_v_vo_chol, cholesky_mo_num, &
|
||||
cc_space_v_vo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_vvoo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
deallocate(buffer)
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -226,7 +422,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_vovo,1)
|
||||
n2 = size(cc_space_v_vovo,2)
|
||||
n3 = size(cc_space_v_vovo,3)
|
||||
n4 = size(cc_space_v_vovo,4)
|
||||
|
||||
double precision, allocatable :: buffer(:,:,:,:)
|
||||
allocate(buffer(n1,n3,n2,n4))
|
||||
|
||||
call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, &
|
||||
cc_space_v_vv_chol, cholesky_mo_num, &
|
||||
cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_vovo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
deallocate(buffer)
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -236,7 +465,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_voov,1)
|
||||
n2 = size(cc_space_v_voov,2)
|
||||
n3 = size(cc_space_v_voov,3)
|
||||
n4 = size(cc_space_v_voov,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_voov(i1,i2,i3,i4) = cc_space_v_vvoo(i1,i4,i3,i2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -246,7 +499,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_ovvo,1)
|
||||
n2 = size(cc_space_v_ovvo,2)
|
||||
n3 = size(cc_space_v_ovvo,3)
|
||||
n4 = size(cc_space_v_ovvo,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_ovvo(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i2,i1,i4)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -256,7 +533,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_ovov,1)
|
||||
n2 = size(cc_space_v_ovov,2)
|
||||
n3 = size(cc_space_v_ovov,3)
|
||||
n4 = size(cc_space_v_ovov,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_ovov(i1,i2,i3,i4) = cc_space_v_vovo(i2,i1,i4,i3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -266,7 +567,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv)
|
||||
if (do_ao_cholesky) then
|
||||
|
||||
integer :: i1, i2, i3, i4
|
||||
integer :: n1, n2, n3, n4
|
||||
|
||||
n1 = size(cc_space_v_oovv,1)
|
||||
n2 = size(cc_space_v_oovv,2)
|
||||
n3 = size(cc_space_v_oovv,3)
|
||||
n4 = size(cc_space_v_oovv,4)
|
||||
|
||||
!$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2)
|
||||
do i4 = 1, n4
|
||||
do i3 = 1, n3
|
||||
do i2 = 1, n2
|
||||
do i1 = 1, n1
|
||||
cc_space_v_oovv(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i4,i1,i2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
else
|
||||
call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv)
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
@ -320,6 +645,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_mo_num, cc_nVa, cc_nVa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_mo_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_mo_num, cc_nVa, cc_nOa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_mo_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_mo_num, cc_nOa, cc_nVa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_mo_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa, cc_nOa)]
|
||||
|
||||
implicit none
|
||||
|
||||
call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_mo_num)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
! ppqq
|
||||
|
||||
BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)]
|
||||
|
@ -73,7 +73,7 @@ subroutine rotation_matrix_iterative(m,X,R)
|
||||
|
||||
!print*,'R'
|
||||
!do i = 1, m
|
||||
! write(*,'(10(E12.5))') R(i,:)
|
||||
! write(*,'(10(ES12.5))') R(i,:)
|
||||
!enddo
|
||||
|
||||
do i = 1, m
|
||||
@ -82,7 +82,7 @@ subroutine rotation_matrix_iterative(m,X,R)
|
||||
|
||||
!print*,'RRT'
|
||||
!do i = 1, m
|
||||
! write(*,'(10(E12.5))') RRT(i,:)
|
||||
! write(*,'(10(ES12.5))') RRT(i,:)
|
||||
!enddo
|
||||
|
||||
max_elem = 0d0
|
||||
|
@ -336,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||
d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
|
||||
d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
|
||||
endif
|
||||
!write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2
|
||||
!write(*,'(a,ES12.5,a,ES12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2
|
||||
|
||||
! Newton's step
|
||||
y = -(1d0/DABS(d_2))*d_1
|
||||
@ -345,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||
if (DABS(y) > alpha) then
|
||||
y = alpha * (y/DABS(y)) ! preservation of the sign of y
|
||||
endif
|
||||
!write(*,'(a,E12.5)') ' Step length: ', y
|
||||
!write(*,'(a,ES12.5)') ' Step length: ', y
|
||||
|
||||
! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series
|
||||
model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2
|
||||
@ -414,7 +414,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||
else
|
||||
alpha = 0.25d0 * alpha
|
||||
endif
|
||||
!write(*,'(a,E12.5)') ' New trust length alpha: ', alpha
|
||||
!write(*,'(a,ES12.5)') ' New trust length alpha: ', alpha
|
||||
|
||||
! cancellaion of the step if rho < 0.1
|
||||
if (rho_2 < thresh_rho_2) then !0.1d0) then
|
||||
|
Loading…
Reference in New Issue
Block a user