mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-19 12:32:30 +01:00
Merge branch 'dev-stable' of https://github.com/AbdAmmar/qp2 into dev-stable
This commit is contained in:
commit
eb236e0112
18
configure
vendored
18
configure
vendored
@ -40,7 +40,7 @@ Usage:
|
|||||||
$(basename $0) -c <file>
|
$(basename $0) -c <file>
|
||||||
$(basename $0) -h
|
$(basename $0) -h
|
||||||
$(basename $0) -i <package>
|
$(basename $0) -i <package>
|
||||||
$(basename $0) -g [nvidia|none]
|
$(basename $0) -g [nvidia|intel|none]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-c <file> Define a COMPILATION configuration file,
|
-c <file> Define a COMPILATION configuration file,
|
||||||
@ -49,7 +49,7 @@ Options:
|
|||||||
-i <package> INSTALL <package>. Use at your OWN RISK:
|
-i <package> INSTALL <package>. Use at your OWN RISK:
|
||||||
no support will be provided for the installation of
|
no support will be provided for the installation of
|
||||||
dependencies.
|
dependencies.
|
||||||
-g [nvidia|none] Choose GPU acceleration (experimental)
|
-g [nvidia|intel|none] Choose GPU acceleration
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
./$(basename $0) -c config/gfortran.cfg
|
./$(basename $0) -c config/gfortran.cfg
|
||||||
@ -115,19 +115,23 @@ while getopts "d:c:i:g:h" c ; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Handle GPU acceleration
|
# Handle GPU acceleration
|
||||||
rm -f ${QP_ROOT}/src/gpu
|
rm -f ${QP_ROOT}/src/gpu_arch
|
||||||
case "$GPU" in
|
case "$GPU" in
|
||||||
amd) # Nvidia
|
amd) # AMD
|
||||||
echo "Activating AMD GPU acceleration"
|
echo "Activating AMD GPU acceleration"
|
||||||
ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu
|
ln -s ${QP_ROOT}/plugins/local/gpu_amd ${QP_ROOT}/src/gpu_arch
|
||||||
|
;;
|
||||||
|
intel) # Intel
|
||||||
|
echo "Activating Intel GPU acceleration (EXPERIMENTAL)"
|
||||||
|
ln -s ${QP_ROOT}/plugins/local/gpu_intel ${QP_ROOT}/src/gpu_arch
|
||||||
;;
|
;;
|
||||||
nvidia) # Nvidia
|
nvidia) # Nvidia
|
||||||
echo "Activating Nvidia GPU acceleration"
|
echo "Activating Nvidia GPU acceleration"
|
||||||
ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu
|
ln -s ${QP_ROOT}/plugins/local/gpu_nvidia ${QP_ROOT}/src/gpu_arch
|
||||||
;;
|
;;
|
||||||
*) # No Acceleration
|
*) # No Acceleration
|
||||||
echo "Disabling GPU acceleration"
|
echo "Disabling GPU acceleration"
|
||||||
ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu
|
ln -s ${QP_ROOT}/plugins/local/gpu_x86 ${QP_ROOT}/src/gpu_arch
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
@ -1,4 +1,36 @@
|
|||||||
|
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
subroutine run_pouet
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Selected Full Configuration Interaction with Stochastic selection and PT2.
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
use selection_types
|
||||||
|
implicit none
|
||||||
|
integer :: i, j, k, ndet
|
||||||
|
integer :: to_select
|
||||||
|
logical :: has
|
||||||
|
type(pt2_type) :: pt2_data, pt2_data_err
|
||||||
|
double precision :: rss
|
||||||
|
double precision :: correlation_energy_ratio
|
||||||
|
double precision :: hf_energy_ref
|
||||||
|
double precision :: relative_error
|
||||||
|
double precision, allocatable :: zeros(:),E_tc(:), norm(:)
|
||||||
|
|
||||||
|
logical, external :: qp_stop
|
||||||
|
double precision, external :: memory_of_double
|
||||||
|
|
||||||
|
PROVIDE mo_l_coef mo_r_coef
|
||||||
|
PROVIDE H_apply_buffer_allocated distributed_davidson
|
||||||
|
|
||||||
|
print*, ' Diagonal elements of the Fock matrix '
|
||||||
|
do i = 1, mo_num
|
||||||
|
write(*,*) i, Fock_matrix_tc_mo_tot(i,i)
|
||||||
|
enddo
|
||||||
|
end
|
||||||
! ---
|
! ---
|
||||||
|
|
||||||
subroutine run_stochastic_cipsi
|
subroutine run_stochastic_cipsi
|
||||||
|
@ -65,7 +65,15 @@ subroutine run_cipsi_tc()
|
|||||||
|
|
||||||
if (.not. is_zmq_slave) then
|
if (.not. is_zmq_slave) then
|
||||||
|
|
||||||
PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
|
if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !!
|
||||||
|
! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot
|
||||||
|
PROVIDE Fock_matrix_tc_mo_tot
|
||||||
|
! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot
|
||||||
|
! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided
|
||||||
|
endif
|
||||||
|
if(.True.)then ! DO NOT REMOVE THE IF(.TRUE.) !!
|
||||||
|
PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
|
||||||
|
endif
|
||||||
|
|
||||||
if((elec_alpha_num+elec_beta_num) .ge. 3) then
|
if((elec_alpha_num+elec_beta_num) .ge. 3) then
|
||||||
if(three_body_h_tc) then
|
if(three_body_h_tc) then
|
||||||
@ -90,8 +98,16 @@ subroutine run_cipsi_tc()
|
|||||||
call json_close
|
call json_close
|
||||||
|
|
||||||
else
|
else
|
||||||
|
if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !!
|
||||||
|
! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot
|
||||||
|
PROVIDE Fock_matrix_tc_mo_tot
|
||||||
|
! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot
|
||||||
|
! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided
|
||||||
|
endif
|
||||||
|
|
||||||
PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks
|
if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !!
|
||||||
|
PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks
|
||||||
|
endif
|
||||||
|
|
||||||
if((elec_alpha_num+elec_beta_num) .ge. 3) then
|
if((elec_alpha_num+elec_beta_num) .ge. 3) then
|
||||||
if(three_body_h_tc) then
|
if(three_body_h_tc) then
|
||||||
|
2
plugins/local/gpu_intel/LIB
Normal file
2
plugins/local/gpu_intel/LIB
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
-ltbb -lsycl -lmkl_sycl -lgpu -limf -lintlc -lstdc++
|
||||||
|
|
8
plugins/local/gpu_intel/README.rst
Normal file
8
plugins/local/gpu_intel/README.rst
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
=========
|
||||||
|
gpu_intel
|
||||||
|
=========
|
||||||
|
|
||||||
|
Intel implementation of GPU routines. Uses MKL and SYCL.
|
||||||
|
```bash
|
||||||
|
icpx -fsycl gpu.cxx -c -qmkl=sequential
|
||||||
|
```
|
177
plugins/local/gpu_intel/gpu.sycl
Normal file
177
plugins/local/gpu_intel/gpu.sycl
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
#include <CL/sycl.hpp>
|
||||||
|
#include <cassert>
|
||||||
|
#include <limits>
|
||||||
|
#include <oneapi/mkl/blas.hpp>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
/* Generic functions */
|
||||||
|
|
||||||
|
int gpu_ndevices() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_set_device(int32_t igpu) {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Allocation functions */
|
||||||
|
|
||||||
|
void gpu_allocate(void** ptr, int64_t size) {
|
||||||
|
auto queue = sycl::queue(sycl::default_selector_v);
|
||||||
|
|
||||||
|
try {
|
||||||
|
*ptr = sycl::malloc_shared(size, queue);
|
||||||
|
assert(*ptr != nullptr);
|
||||||
|
} catch (const sycl::exception& e) {
|
||||||
|
std::cerr << "SYCL exception caught: " << e.what() << std::endl;
|
||||||
|
*ptr = nullptr; // If allocation fails, set pointer to nullptr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_deallocate(void** ptr) {
|
||||||
|
assert(*ptr != nullptr);
|
||||||
|
sycl::free(*ptr, sycl::queue(sycl::default_selector_v));
|
||||||
|
*ptr = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload data from host to device */
|
||||||
|
void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
|
||||||
|
sycl::queue queue(sycl::default_selector_v);
|
||||||
|
queue.memcpy(gpu_ptr, cpu_ptr, n).wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Download data from device to host */
|
||||||
|
void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
|
||||||
|
sycl::queue queue(sycl::default_selector_v);
|
||||||
|
queue.memcpy(cpu_ptr, gpu_ptr, n).wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy data from one GPU memory location to another */
|
||||||
|
void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
|
||||||
|
sycl::queue queue(sycl::default_selector_v);
|
||||||
|
queue.memcpy(gpu_ptr_dest, gpu_ptr_src, n).wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Queues */
|
||||||
|
|
||||||
|
/* SYCL queue as a replacement for CUDA stream */
|
||||||
|
void gpu_stream_create(sycl::queue** ptr) {
|
||||||
|
*ptr = new sycl::queue(sycl::default_selector_v);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_stream_destroy(sycl::queue** ptr) {
|
||||||
|
assert(*ptr != nullptr);
|
||||||
|
delete *ptr;
|
||||||
|
*ptr = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_synchronize() {
|
||||||
|
sycl::queue queue(sycl::default_selector_v);
|
||||||
|
queue.wait_and_throw();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* BLAS functions */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
sycl::queue* queue;
|
||||||
|
} blasHandle_t;
|
||||||
|
|
||||||
|
void gpu_set_stream(blasHandle_t* handle, sycl::queue* ptr) {
|
||||||
|
handle->queue = ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_blas_create(blasHandle_t** ptr) {
|
||||||
|
*ptr = (blasHandle_t*) malloc(sizeof(blasHandle_t));
|
||||||
|
assert(*ptr != nullptr);
|
||||||
|
(*ptr)->queue = new sycl::queue(sycl::default_selector_v);
|
||||||
|
assert((*ptr)->queue != nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_blas_destroy(blasHandle_t** ptr) {
|
||||||
|
assert(*ptr != nullptr);
|
||||||
|
delete (*ptr)->queue;
|
||||||
|
free(*ptr);
|
||||||
|
*ptr = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_ddot(blasHandle_t* handle, const int64_t n, const double* x, const int64_t incx,
|
||||||
|
const double* y, const int64_t incy, double* result) {
|
||||||
|
// Ensure input parameters are valid
|
||||||
|
assert(handle != nullptr);
|
||||||
|
assert(handle->queue != nullptr);
|
||||||
|
assert(n > 0);
|
||||||
|
assert(incx > 0);
|
||||||
|
assert(incy > 0);
|
||||||
|
assert(x != nullptr);
|
||||||
|
assert(y != nullptr);
|
||||||
|
assert(result != nullptr);
|
||||||
|
|
||||||
|
oneapi::mkl::blas::dot(*handle->queue, n, x, incx, y, incy, result);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_dgemv(blasHandle_t* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
|
||||||
|
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
|
||||||
|
|
||||||
|
assert(handle != nullptr);
|
||||||
|
assert(handle->queue != nullptr);
|
||||||
|
|
||||||
|
// Validate matrix dimensions and increments to be positive
|
||||||
|
assert(m > 0 && n > 0 && lda > 0 && incx > 0 && incy > 0);
|
||||||
|
assert(a != nullptr && x != nullptr && y != nullptr && alpha != nullptr && beta != nullptr);
|
||||||
|
|
||||||
|
// Determine the operation type
|
||||||
|
oneapi::mkl::transpose transa_ = oneapi::mkl::transpose::nontrans;
|
||||||
|
if (*transa == 'T' || *transa == 't') {
|
||||||
|
transa_ = oneapi::mkl::transpose::trans;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform DGEMV operation using oneMKL
|
||||||
|
oneapi::mkl::blas::column_major::gemv(*handle->queue, transa_, m, n, *alpha, a, lda, x, incx, *beta, y, incy);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_dgemm(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
|
||||||
|
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
|
||||||
|
|
||||||
|
assert(handle != nullptr && handle->queue != nullptr);
|
||||||
|
assert(m > 0 && n > 0 && k > 0 && lda > 0 && ldb > 0 && ldc > 0);
|
||||||
|
assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr);
|
||||||
|
|
||||||
|
// Transpose operations
|
||||||
|
auto transa_ = (*transa == 'T' || *transa == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans;
|
||||||
|
auto transb_ = (*transb == 'T' || *transb == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans;
|
||||||
|
|
||||||
|
oneapi::mkl::blas::column_major::gemm(*handle->queue, transa_, transb_, m, n, k,
|
||||||
|
*alpha, a, lda, b, ldb, *beta, c, ldc);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgeam(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
|
||||||
|
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
||||||
|
assert(handle != nullptr && handle->queue != nullptr);
|
||||||
|
assert(m > 0 && n > 0 && lda > 0 && ldb > 0 && ldc > 0);
|
||||||
|
assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr);
|
||||||
|
|
||||||
|
// Determine transpose operations
|
||||||
|
bool transA = (*transa == 'T' || *transa == 't');
|
||||||
|
bool transB = (*transb == 'T' || *transb == 't');
|
||||||
|
|
||||||
|
handle->queue->submit([&](sycl::handler& cgh) {
|
||||||
|
cgh.parallel_for(sycl::range<2>(m, n), [=](sycl::id<2> idx) {
|
||||||
|
const int i = idx[0];
|
||||||
|
const int j = idx[1];
|
||||||
|
const int ai = transA ? j * lda + i : i * lda + j;
|
||||||
|
const int bi = transB ? j * ldb + i : i * ldb + j;
|
||||||
|
const int ci = i * ldc + j;
|
||||||
|
|
||||||
|
c[ci] = (*alpha) * a[ai] + (*beta) * b[bi];
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // extern C
|
1
plugins/local/gpu_nvidia/LIB
Normal file
1
plugins/local/gpu_nvidia/LIB
Normal file
@ -0,0 +1 @@
|
|||||||
|
-lcudart -lcublas -lcublasLt
|
1
plugins/local/gpu_nvidia/NEED
Normal file
1
plugins/local/gpu_nvidia/NEED
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
5
plugins/local/gpu_nvidia/README.rst
Normal file
5
plugins/local/gpu_nvidia/README.rst
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
==========
|
||||||
|
gpu_nvidia
|
||||||
|
==========
|
||||||
|
|
||||||
|
Nvidia implementation of GPU routines. Uses CUDA and CUBLAS libraries.
|
326
plugins/local/gpu_nvidia/gpu.c
Normal file
326
plugins/local/gpu_nvidia/gpu.c
Normal file
@ -0,0 +1,326 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* Generic functions */
|
||||||
|
|
||||||
|
int gpu_ndevices() {
|
||||||
|
int ngpus;
|
||||||
|
cudaGetDeviceCount(&ngpus);
|
||||||
|
return ngpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_set_device(int32_t igpu) {
|
||||||
|
cudaSetDevice((int) igpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Allocation functions */
|
||||||
|
|
||||||
|
void gpu_allocate(void** ptr, const int64_t size) {
|
||||||
|
size_t free, total;
|
||||||
|
cudaError_t rc = cudaMemGetInfo( &free, &total );
|
||||||
|
if (rc != cudaSuccess) {
|
||||||
|
free = INT64_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal);
|
||||||
|
// /* Use managed memory if it does not fit on the GPU */
|
||||||
|
// if (size < free && size < total/2) {
|
||||||
|
// rc= cudaMalloc(ptr, size);
|
||||||
|
// } else {
|
||||||
|
// rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal);
|
||||||
|
// }
|
||||||
|
assert (rc == cudaSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_deallocate(void** ptr) {
|
||||||
|
assert (*ptr != NULL);
|
||||||
|
cudaFree(*ptr);
|
||||||
|
*ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Memory transfer functions */
|
||||||
|
|
||||||
|
void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
|
||||||
|
cudaMemcpy (gpu_ptr, cpu_ptr, n, cudaMemcpyHostToDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
|
||||||
|
cudaMemcpy (cpu_ptr, gpu_ptr, n, cudaMemcpyDeviceToHost);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
|
||||||
|
cudaMemcpy (gpu_ptr_dest, gpu_ptr_src, n, cudaMemcpyDeviceToDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Streams */
|
||||||
|
|
||||||
|
void gpu_stream_create(cudaStream_t* ptr) {
|
||||||
|
cudaError_t rc = cudaStreamCreate(ptr);
|
||||||
|
assert (rc == cudaSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_stream_destroy(cudaStream_t* ptr) {
|
||||||
|
assert (ptr != NULL);
|
||||||
|
cudaError_t rc = cudaStreamDestroy(*ptr);
|
||||||
|
assert (rc == cudaSuccess);
|
||||||
|
*ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) {
|
||||||
|
cublasSetStream(handle, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_synchronize() {
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* BLAS functions */
|
||||||
|
|
||||||
|
void gpu_blas_create(cublasHandle_t* ptr) {
|
||||||
|
cublasStatus_t rc = cublasCreate(ptr);
|
||||||
|
assert (rc == CUBLAS_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_blas_destroy(cublasHandle_t* ptr) {
|
||||||
|
assert (ptr != NULL);
|
||||||
|
cublasStatus_t rc = cublasDestroy(*ptr);
|
||||||
|
assert (rc == CUBLAS_STATUS_SUCCESS);
|
||||||
|
ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_ddot(cublasHandle_t handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
|
||||||
|
assert (handle != NULL);
|
||||||
|
/* Convert to int */
|
||||||
|
int n_, incx_, incy_;
|
||||||
|
|
||||||
|
n_ = (int) n;
|
||||||
|
incx_ = (int) incx;
|
||||||
|
incy_ = (int) incy;
|
||||||
|
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasStatus_t rc = cublasDdot(handle, n_, x, incx_, y, incy_, result);
|
||||||
|
assert (rc == CUBLAS_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int n_, incx_, incy_;
|
||||||
|
|
||||||
|
n_ = (int) n;
|
||||||
|
incx_ = (int) incx;
|
||||||
|
incy_ = (int) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
float result_ = 0.;
|
||||||
|
cublasStatus_t rc = cublasSdot(handle, n_, x, incx_, y, incy_, &result_);
|
||||||
|
assert (rc == CUBLAS_STATUS_SUCCESS);
|
||||||
|
*result = result_;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
|
||||||
|
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
|
||||||
|
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int m_, n_, lda_, incx_, incy_;
|
||||||
|
|
||||||
|
m_ = (int) m;
|
||||||
|
n_ = (int) n;
|
||||||
|
lda_ = (int) lda;
|
||||||
|
incx_ = (int) incx;
|
||||||
|
incy_ = (int) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
|
||||||
|
const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
|
||||||
|
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int m_, n_, lda_, incx_, incy_;
|
||||||
|
|
||||||
|
m_ = (int) m;
|
||||||
|
n_ = (int) n;
|
||||||
|
lda_ = (int) lda;
|
||||||
|
incx_ = (int) incx;
|
||||||
|
incy_ = (int) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
|
||||||
|
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
|
||||||
|
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int m_, n_, k_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int) m;
|
||||||
|
n_ = (int) n;
|
||||||
|
k_ = (int) k;
|
||||||
|
lda_ = (int) lda;
|
||||||
|
ldb_ = (int) ldb;
|
||||||
|
ldc_ = (int) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) k_ == k );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
|
||||||
|
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
|
||||||
|
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int m_, n_, k_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int) m;
|
||||||
|
n_ = (int) n;
|
||||||
|
k_ = (int) k;
|
||||||
|
lda_ = (int) lda;
|
||||||
|
ldb_ = (int) ldb;
|
||||||
|
ldc_ = (int) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) k_ == k );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
|
||||||
|
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int m_, n_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int) m;
|
||||||
|
n_ = (int) n;
|
||||||
|
lda_ = (int) lda;
|
||||||
|
ldb_ = (int) ldb;
|
||||||
|
ldc_ = (int) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
|
||||||
|
const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
|
||||||
|
assert (handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int */
|
||||||
|
int m_, n_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int) m;
|
||||||
|
n_ = (int) n;
|
||||||
|
lda_ = (int) lda;
|
||||||
|
ldb_ = (int) ldb;
|
||||||
|
ldc_ = (int) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
|
||||||
|
|
||||||
|
}
|
1
plugins/local/gpu_x86/NEED
Normal file
1
plugins/local/gpu_x86/NEED
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
@ -2,13 +2,13 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
|
||||||
/* Generic functions */
|
/* Generic functions */
|
||||||
|
|
||||||
int gpu_ndevices() {
|
int gpu_ndevices() {
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpu_set_device(int32_t i) {
|
void gpu_set_device(int32_t i) {
|
||||||
@ -25,7 +25,7 @@ void gpu_allocate(void** ptr, const int64_t n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpu_free(void** ptr) {
|
void gpu_deallocate(void** ptr) {
|
||||||
free(*ptr);
|
free(*ptr);
|
||||||
*ptr = NULL;
|
*ptr = NULL;
|
||||||
}
|
}
|
||||||
@ -49,10 +49,11 @@ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
|
|||||||
/* Streams */
|
/* Streams */
|
||||||
|
|
||||||
void gpu_stream_create(void** ptr) {
|
void gpu_stream_create(void** ptr) {
|
||||||
*ptr = (void*) 2;
|
*ptr = (void*) malloc(sizeof(char));
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpu_stream_destroy(void** ptr) {
|
void gpu_stream_destroy(void** ptr) {
|
||||||
|
free(*ptr);
|
||||||
*ptr = NULL;
|
*ptr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,18 +69,19 @@ void gpu_synchronize() {
|
|||||||
/* BLAS functions */
|
/* BLAS functions */
|
||||||
|
|
||||||
void gpu_blas_create(void** handle) {
|
void gpu_blas_create(void** handle) {
|
||||||
*handle = (void*) 1;
|
*handle = (void*) malloc(sizeof(char));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void gpu_blas_destroy(void** handle) {
|
void gpu_blas_destroy(void** handle) {
|
||||||
|
free(*handle);
|
||||||
*handle = NULL;
|
*handle = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
|
double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
|
void gpu_ddot(void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
|
||||||
assert (handle != NULL);
|
assert (handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
@ -100,7 +102,7 @@ void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_
|
|||||||
|
|
||||||
float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
|
float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
|
void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
|
||||||
assert (handle != NULL);
|
assert (handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
@ -122,8 +124,8 @@ void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t
|
|||||||
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
|
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
|
||||||
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
|
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha,
|
void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
|
||||||
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) {
|
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (handle != NULL);
|
||||||
|
|
||||||
@ -143,15 +145,15 @@ void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int
|
|||||||
assert ( (int64_t) incx_ == incx);
|
assert ( (int64_t) incx_ == incx);
|
||||||
assert ( (int64_t) incy_ == incy);
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
dgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_);
|
dgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
|
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
|
||||||
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
|
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha,
|
void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
|
||||||
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) {
|
const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (handle != NULL);
|
||||||
|
|
||||||
@ -171,15 +173,15 @@ void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int
|
|||||||
assert ( (int64_t) incx_ == incx);
|
assert ( (int64_t) incx_ == incx);
|
||||||
assert ( (int64_t) incy_ == incy);
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
sgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_);
|
sgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
|
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
|
||||||
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
|
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
|
||||||
|
|
||||||
void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha,
|
void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
|
||||||
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) {
|
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (handle != NULL);
|
||||||
|
|
||||||
@ -201,7 +203,7 @@ void gpu_dgemm(const void* handle, const char transa, const char transb, const i
|
|||||||
assert ( (int64_t) ldb_ == ldb);
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
assert ( (int64_t) ldc_ == ldc);
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
dgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_);
|
dgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -209,8 +211,8 @@ void gpu_dgemm(const void* handle, const char transa, const char transb, const i
|
|||||||
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
|
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
|
||||||
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
|
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
|
||||||
|
|
||||||
void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha,
|
void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
|
||||||
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) {
|
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (handle != NULL);
|
||||||
|
|
||||||
@ -232,136 +234,133 @@ void gpu_sgemm(const void* handle, const char transa, const char transb, const i
|
|||||||
assert ( (int64_t) ldb_ == ldb);
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
assert ( (int64_t) ldc_ == ldc);
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
sgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_);
|
sgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha,
|
void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
|
||||||
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
||||||
if (handle == NULL) {
|
assert (handle != NULL);
|
||||||
perror("NULL handle");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( (transa == 'N' && transb == 'N') ||
|
if ( (*transa == 'N' && *transb == 'N') ||
|
||||||
(transa == 'n' && transb == 'N') ||
|
(*transa == 'n' && *transb == 'N') ||
|
||||||
(transa == 'N' && transb == 'n') ||
|
(*transa == 'N' && *transb == 'n') ||
|
||||||
(transa == 'n' && transb == 'n') ) {
|
(*transa == 'n' && *transb == 'n') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[j*ldb+i];
|
c[j*ldc+i] = *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i];
|
c[j*ldc+i] = *alpha * a[j*lda+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[j*ldb+i];
|
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( (transa == 'N' && transb == 'T') ||
|
} else if ( (*transa == 'N' && *transb == 'T') ||
|
||||||
(transa == 'n' && transb == 'T') ||
|
(*transa == 'n' && *transb == 'T') ||
|
||||||
(transa == 'N' && transb == 't') ||
|
(*transa == 'N' && *transb == 't') ||
|
||||||
(transa == 'n' && transb == 't') ) {
|
(*transa == 'n' && *transb == 't') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[i*ldb+j];
|
c[j*ldc+i] = *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i];
|
c[j*ldc+i] = *alpha * a[j*lda+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[i*ldb+j];
|
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( (transa == 'T' && transb == 'N') ||
|
} else if ( (*transa == 'T' && *transb == 'N') ||
|
||||||
(transa == 't' && transb == 'N') ||
|
(*transa == 't' && *transb == 'N') ||
|
||||||
(transa == 'T' && transb == 'n') ||
|
(*transa == 'T' && *transb == 'n') ||
|
||||||
(transa == 't' && transb == 'n') ) {
|
(*transa == 't' && *transb == 'n') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[j*ldb+i];
|
c[j*ldc+i] = *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j];
|
c[j*ldc+i] = *alpha * a[i*lda+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[j*ldb+i];
|
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( (transa == 'T' && transb == 'T') ||
|
} else if ( (*transa == 'T' && *transb == 'T') ||
|
||||||
(transa == 't' && transb == 'T') ||
|
(*transa == 't' && *transb == 'T') ||
|
||||||
(transa == 'T' && transb == 't') ||
|
(*transa == 'T' && *transb == 't') ||
|
||||||
(transa == 't' && transb == 't') ) {
|
(*transa == 't' && *transb == 't') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[i*ldb+j];
|
c[j*ldc+i] = *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j];
|
c[j*ldc+i] = *alpha * a[i*lda+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[i*ldb+j];
|
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -371,132 +370,129 @@ void gpu_dgeam(const void* handle, const char transa, const char transb, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha,
|
void gpu_sgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
|
||||||
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
|
const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
|
||||||
if (handle == NULL) {
|
assert (handle != NULL);
|
||||||
perror("NULL handle");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( (transa == 'N' && transb == 'N') ||
|
if ( (*transa == 'N' && *transb == 'N') ||
|
||||||
(transa == 'n' && transb == 'N') ||
|
(*transa == 'n' && *transb == 'N') ||
|
||||||
(transa == 'N' && transb == 'n') ||
|
(*transa == 'N' && *transb == 'n') ||
|
||||||
(transa == 'n' && transb == 'n') ) {
|
(*transa == 'n' && *transb == 'n') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[j*ldb+i];
|
c[j*ldc+i] = *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i];
|
c[j*ldc+i] = *alpha * a[j*lda+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[j*ldb+i];
|
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( (transa == 'N' && transb == 'T') ||
|
} else if ( (*transa == 'N' && *transb == 'T') ||
|
||||||
(transa == 'n' && transb == 'T') ||
|
(*transa == 'n' && *transb == 'T') ||
|
||||||
(transa == 'N' && transb == 't') ||
|
(*transa == 'N' && *transb == 't') ||
|
||||||
(transa == 'n' && transb == 't') ) {
|
(*transa == 'n' && *transb == 't') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[i*ldb+j];
|
c[j*ldc+i] = *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i];
|
c[j*ldc+i] = *alpha * a[j*lda+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[i*ldb+j];
|
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( (transa == 'T' && transb == 'N') ||
|
} else if ( (*transa == 'T' && *transb == 'N') ||
|
||||||
(transa == 't' && transb == 'N') ||
|
(*transa == 't' && *transb == 'N') ||
|
||||||
(transa == 'T' && transb == 'n') ||
|
(*transa == 'T' && *transb == 'n') ||
|
||||||
(transa == 't' && transb == 'n') ) {
|
(*transa == 't' && *transb == 'n') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[j*ldb+i];
|
c[j*ldc+i] = *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j];
|
c[j*ldc+i] = *alpha * a[i*lda+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[j*ldb+i];
|
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( (transa == 'T' && transb == 'T') ||
|
} else if ( (*transa == 'T' && *transb == 'T') ||
|
||||||
(transa == 't' && transb == 'T') ||
|
(*transa == 't' && *transb == 'T') ||
|
||||||
(transa == 'T' && transb == 't') ||
|
(*transa == 'T' && *transb == 't') ||
|
||||||
(transa == 't' && transb == 't') ) {
|
(*transa == 't' && *transb == 't') ) {
|
||||||
|
|
||||||
if (alpha == 0.) {
|
if (*alpha == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = beta * b[i*ldb+j];
|
c[j*ldc+i] = *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (beta == 0.) {
|
} else if (*beta == 0.) {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j];
|
c[j*ldc+i] = *alpha * a[i*lda+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int64_t j=0 ; j<n ; ++j) {
|
for (int64_t j=0 ; j<n ; ++j) {
|
||||||
for (int64_t i=0 ; i<n ; ++i) {
|
for (int64_t i=0 ; i<m ; ++i) {
|
||||||
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[i*ldb+j];
|
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -288,25 +288,31 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
|
|||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
else
|
else
|
||||||
print*, ' ao_integrals_map will be used'
|
! print*, ' ao_integrals_map will be used'
|
||||||
PROVIDE ao_integrals_map
|
! PROVIDE ao_integrals_map
|
||||||
|
print*,'Cholesky vectors will be used '
|
||||||
|
double precision :: get_ao_integ_chol,eri
|
||||||
|
eri = get_ao_integ_chol(1,1,1,1) ! FOR OPENMP
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) &
|
!!! !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) &
|
||||||
!$OMP PRIVATE(i, j, k, l)
|
!$OMP SHARED(ao_num, ao_two_e_tc_tot) &
|
||||||
|
!$OMP PRIVATE(i, j, k, l,eri)
|
||||||
!$OMP DO COLLAPSE(3)
|
!$OMP DO COLLAPSE(3)
|
||||||
do j = 1, ao_num
|
do j = 1, ao_num
|
||||||
do l = 1, ao_num
|
do l = 1, ao_num
|
||||||
do i = 1, ao_num
|
do i = 1, ao_num
|
||||||
do k = 1, ao_num
|
do k = 1, ao_num
|
||||||
! < 1:i, 2:j | 1:k, 2:l >
|
! < 1:i, 2:j | 1:k, 2:l >
|
||||||
ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
|
! eri = get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
|
||||||
|
eri = get_ao_integ_chol(i,k,j,l)
|
||||||
|
ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + eri
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
FREE ao_integrals_map
|
! FREE ao_integrals_map
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then
|
if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then
|
||||||
|
@ -10,8 +10,6 @@ subroutine provide_all_three_ints_bi_ortho()
|
|||||||
implicit none
|
implicit none
|
||||||
double precision :: t1, t2
|
double precision :: t1, t2
|
||||||
|
|
||||||
PROVIDE ao_two_e_integrals_in_map
|
|
||||||
|
|
||||||
print *, ' start provide_all_three_ints_bi_ortho'
|
print *, ' start provide_all_three_ints_bi_ortho'
|
||||||
call wall_time(t1)
|
call wall_time(t1)
|
||||||
|
|
||||||
|
@ -30,7 +30,9 @@ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)]
|
|||||||
print *, ' PROVIDING htilde_matrix_elmt_bi_ortho ...'
|
print *, ' PROVIDING htilde_matrix_elmt_bi_ortho ...'
|
||||||
call wall_time(t1)
|
call wall_time(t1)
|
||||||
|
|
||||||
call provide_all_three_ints_bi_ortho()
|
if(three_body_h_tc)then
|
||||||
|
call provide_all_three_ints_bi_ortho()
|
||||||
|
endif
|
||||||
|
|
||||||
i = 1
|
i = 1
|
||||||
j = 1
|
j = 1
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
program spher_harm
|
program spher_harm
|
||||||
implicit none
|
implicit none
|
||||||
! call test_spher_harm
|
call test_spher_harm
|
||||||
! call test_cart
|
! call test_cart
|
||||||
call test_brutal_spheric
|
! call test_brutal_spheric
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm)
|
|||||||
double precision :: theta, phi,r_abs
|
double precision :: theta, phi,r_abs
|
||||||
call cartesian_to_spherical(r,theta,phi,r_abs)
|
call cartesian_to_spherical(r,theta,phi,r_abs)
|
||||||
call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm)
|
call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm)
|
||||||
|
! call spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -131,6 +132,10 @@ subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm)
|
|||||||
tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta)
|
tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta)
|
||||||
re_ylm = tmp * dcos(phi)
|
re_ylm = tmp * dcos(phi)
|
||||||
im_ylm = tmp * dsin(phi)
|
im_ylm = tmp * dsin(phi)
|
||||||
|
else if (l==1.and.m==-1)then
|
||||||
|
tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta)
|
||||||
|
re_ylm = tmp * dcos(phi)
|
||||||
|
im_ylm = -tmp * dsin(phi)
|
||||||
else if(l==1.and.m==0)then
|
else if(l==1.and.m==0)then
|
||||||
tmp = inv_sq_pi * dsqrt(3.d0/4.d0) * dcos(theta)
|
tmp = inv_sq_pi * dsqrt(3.d0/4.d0) * dcos(theta)
|
||||||
re_ylm = tmp
|
re_ylm = tmp
|
||||||
@ -139,10 +144,18 @@ subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm)
|
|||||||
tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta)
|
tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta)
|
||||||
re_ylm = tmp * dcos(2.d0*phi)
|
re_ylm = tmp * dcos(2.d0*phi)
|
||||||
im_ylm = tmp * dsin(2.d0*phi)
|
im_ylm = tmp * dsin(2.d0*phi)
|
||||||
|
else if(l==2.and.m==-2)then
|
||||||
|
tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta)
|
||||||
|
re_ylm = tmp * dcos(2.d0*phi)
|
||||||
|
im_ylm =-tmp * dsin(2.d0*phi)
|
||||||
else if(l==2.and.m==1)then
|
else if(l==2.and.m==1)then
|
||||||
tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta)
|
tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta)
|
||||||
re_ylm = tmp * dcos(phi)
|
re_ylm = tmp * dcos(phi)
|
||||||
im_ylm = tmp * dsin(phi)
|
im_ylm = tmp * dsin(phi)
|
||||||
|
else if(l==2.and.m==-1)then
|
||||||
|
tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta)
|
||||||
|
re_ylm = tmp * dcos(phi)
|
||||||
|
im_ylm =-tmp * dsin(phi)
|
||||||
else if(l==2.and.m==0)then
|
else if(l==2.and.m==0)then
|
||||||
tmp = dsqrt(5.d0/4.d0) * inv_sq_pi* (1.5d0*dcos(theta)*dcos(theta)-0.5d0)
|
tmp = dsqrt(5.d0/4.d0) * inv_sq_pi* (1.5d0*dcos(theta)*dcos(theta)-0.5d0)
|
||||||
re_ylm = tmp
|
re_ylm = tmp
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
gpu
|
||||||
tc_keywords
|
tc_keywords
|
||||||
jastrow
|
jastrow
|
||||||
qmckl
|
qmckl
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
! ---
|
! ---
|
||||||
|
|
||||||
subroutine provide_int2_grad1_u12_ao()
|
subroutine provide_int2_grad1_u12_ao()
|
||||||
|
use gpu
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
!
|
!
|
||||||
! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2)
|
! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2)
|
||||||
@ -35,8 +35,9 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
double precision :: weight1, ao_k_r, ao_i_r
|
double precision :: weight1, ao_k_r, ao_i_r
|
||||||
double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
|
double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
|
||||||
double precision :: time0, time1, time2, tc1, tc2, tc
|
double precision :: time0, time1, time2, tc1, tc2, tc
|
||||||
double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:), tc_int_2e_ao(:,:,:,:)
|
type(gpu_double4) :: int2_grad1_u12_ao
|
||||||
double precision, allocatable :: tmp(:,:,:), c_mat(:,:,:), tmp_grad1_u12(:,:,:)
|
type(gpu_double3) :: tmp_grad1_u12, tmp_grad1_u12p, tmp
|
||||||
|
double precision, allocatable :: c_mat(:,:,:), tc_int_2e_ao(:,:,:,:)
|
||||||
|
|
||||||
double precision, external :: get_ao_two_e_integral
|
double precision, external :: get_ao_two_e_integral
|
||||||
|
|
||||||
@ -51,6 +52,7 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
|
|
||||||
call total_memory(mem)
|
call total_memory(mem)
|
||||||
mem = max(1.d0, qp_max_mem - mem)
|
mem = max(1.d0, qp_max_mem - mem)
|
||||||
|
mem = 6
|
||||||
n_double = mem * 1.d8
|
n_double = mem * 1.d8
|
||||||
n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid))
|
n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid))
|
||||||
n_rest = int(mod(n_points_final_grid, n_blocks))
|
n_rest = int(mod(n_points_final_grid, n_blocks))
|
||||||
@ -64,9 +66,9 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
! ---
|
! ---
|
||||||
! ---
|
! ---
|
||||||
|
|
||||||
allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4))
|
call gpu_allocate(int2_grad1_u12_ao, ao_num,ao_num,n_points_final_grid,4)
|
||||||
|
|
||||||
allocate(tmp(n_points_extra_final_grid,ao_num,ao_num))
|
call gpu_allocate(tmp,n_points_extra_final_grid,ao_num,ao_num)
|
||||||
!$OMP PARALLEL &
|
!$OMP PARALLEL &
|
||||||
!$OMP DEFAULT (NONE) &
|
!$OMP DEFAULT (NONE) &
|
||||||
!$OMP PRIVATE (j, i, jpoint) &
|
!$OMP PRIVATE (j, i, jpoint) &
|
||||||
@ -75,21 +77,28 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
do j = 1, ao_num
|
do j = 1, ao_num
|
||||||
do i = 1, ao_num
|
do i = 1, ao_num
|
||||||
do jpoint = 1, n_points_extra_final_grid
|
do jpoint = 1, n_points_extra_final_grid
|
||||||
tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
|
tmp%f(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4))
|
call gpu_allocate(tmp_grad1_u12,n_points_extra_final_grid,n_blocks,4)
|
||||||
|
call gpu_allocate(tmp_grad1_u12p,n_points_extra_final_grid,n_blocks,4)
|
||||||
|
|
||||||
tc = 0.d0
|
tc = 0.d0
|
||||||
|
|
||||||
|
type(gpu_stream) :: stream(4)
|
||||||
|
do i=1,4
|
||||||
|
call gpu_stream_create(stream(i))
|
||||||
|
enddo
|
||||||
|
|
||||||
do i_pass = 1, n_pass
|
do i_pass = 1, n_pass
|
||||||
ii = (i_pass-1)*n_blocks + 1
|
ii = (i_pass-1)*n_blocks + 1
|
||||||
|
|
||||||
call wall_time(tc1)
|
call wall_time(tc1)
|
||||||
|
|
||||||
!$OMP PARALLEL &
|
!$OMP PARALLEL &
|
||||||
!$OMP DEFAULT (NONE) &
|
!$OMP DEFAULT (NONE) &
|
||||||
!$OMP PRIVATE (i_blocks, ipoint) &
|
!$OMP PRIVATE (i_blocks, ipoint) &
|
||||||
@ -97,27 +106,26 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
!$OMP DO
|
!$OMP DO
|
||||||
do i_blocks = 1, n_blocks
|
do i_blocks = 1, n_blocks
|
||||||
ipoint = ii - 1 + i_blocks ! r1
|
ipoint = ii - 1 + i_blocks ! r1
|
||||||
call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4))
|
call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_blocks,1), tmp_grad1_u12%f(1,i_blocks,2), &
|
||||||
|
tmp_grad1_u12%f(1,i_blocks,3), tmp_grad1_u12%f(1,i_blocks,4))
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
call wall_time(tc2)
|
call wall_time(tc2)
|
||||||
tc = tc + tc2 - tc1
|
tc = tc + tc2 - tc1
|
||||||
|
|
||||||
|
call gpu_synchronize()
|
||||||
|
call gpu_copy(tmp_grad1_u12,tmp_grad1_u12p)
|
||||||
do m = 1, 4
|
do m = 1, 4
|
||||||
call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 &
|
call gpu_set_stream(blas_handle, stream(m))
|
||||||
, tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid &
|
call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 &
|
||||||
, 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num)
|
, tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12p%f(1,1,m), n_points_extra_final_grid &
|
||||||
|
, 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
deallocate(tmp_grad1_u12)
|
|
||||||
|
|
||||||
|
|
||||||
if(n_rest .gt. 0) then
|
if(n_rest .gt. 0) then
|
||||||
|
|
||||||
allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,4))
|
|
||||||
|
|
||||||
ii = n_pass*n_blocks + 1
|
ii = n_pass*n_blocks + 1
|
||||||
|
|
||||||
call wall_time(tc1)
|
call wall_time(tc1)
|
||||||
@ -128,7 +136,8 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
!$OMP DO
|
!$OMP DO
|
||||||
do i_rest = 1, n_rest
|
do i_rest = 1, n_rest
|
||||||
ipoint = ii - 1 + i_rest ! r1
|
ipoint = ii - 1 + i_rest ! r1
|
||||||
call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4))
|
call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_rest,1), tmp_grad1_u12%f(1,i_rest,2), &
|
||||||
|
tmp_grad1_u12%f(1,i_rest,3), tmp_grad1_u12%f(1,i_rest,4))
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
@ -136,15 +145,23 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
tc = tc + tc2 - tc1
|
tc = tc + tc2 - tc1
|
||||||
|
|
||||||
do m = 1, 4
|
do m = 1, 4
|
||||||
call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 &
|
call gpu_set_stream(blas_handle, stream(m))
|
||||||
, tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid &
|
call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 &
|
||||||
, 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num)
|
, tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12%f(1,1,m), n_points_extra_final_grid &
|
||||||
|
, 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
deallocate(tmp_grad1_u12)
|
|
||||||
endif
|
endif
|
||||||
|
call gpu_synchronize()
|
||||||
|
call gpu_deallocate(tmp_grad1_u12)
|
||||||
|
call gpu_deallocate(tmp_grad1_u12p)
|
||||||
|
|
||||||
deallocate(tmp)
|
do i=1,4
|
||||||
|
call gpu_stream_destroy(stream(i))
|
||||||
|
enddo
|
||||||
|
|
||||||
|
|
||||||
|
call gpu_deallocate(tmp)
|
||||||
|
|
||||||
|
|
||||||
call wall_time(time1)
|
call wall_time(time1)
|
||||||
@ -152,6 +169,8 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0
|
print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0
|
||||||
call print_memory_usage()
|
call print_memory_usage()
|
||||||
|
|
||||||
|
!TODO
|
||||||
|
stop
|
||||||
! ---
|
! ---
|
||||||
! ---
|
! ---
|
||||||
! ---
|
! ---
|
||||||
@ -177,7 +196,7 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
|
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
|
||||||
, int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
|
, int2_grad1_u12_ao%f(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
|
||||||
, 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num)
|
, 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num)
|
||||||
deallocate(c_mat)
|
deallocate(c_mat)
|
||||||
|
|
||||||
@ -213,7 +232,7 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 &
|
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 &
|
||||||
, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
|
, int2_grad1_u12_ao%f(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
|
||||||
, 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num)
|
, 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num)
|
||||||
enddo
|
enddo
|
||||||
deallocate(c_mat)
|
deallocate(c_mat)
|
||||||
@ -263,7 +282,7 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao'
|
print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao'
|
||||||
open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write")
|
open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write")
|
||||||
call ezfio_set_work_empty(.False.)
|
call ezfio_set_work_empty(.False.)
|
||||||
write(11) int2_grad1_u12_ao(:,:,:,1:3)
|
write(11) int2_grad1_u12_ao%f(:,:,:,1:3)
|
||||||
close(11)
|
close(11)
|
||||||
|
|
||||||
print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot'
|
print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot'
|
||||||
@ -276,7 +295,7 @@ subroutine provide_int2_grad1_u12_ao()
|
|||||||
|
|
||||||
! ----
|
! ----
|
||||||
|
|
||||||
deallocate(int2_grad1_u12_ao)
|
call gpu_deallocate(int2_grad1_u12_ao)
|
||||||
deallocate(tc_int_2e_ao)
|
deallocate(tc_int_2e_ao)
|
||||||
|
|
||||||
call wall_time(time2)
|
call wall_time(time2)
|
||||||
|
@ -1,3 +1,15 @@
|
|||||||
|
double precision function get_ao_integ_chol(i,j,k,l)
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! CHOLESKY representation of the integral of the AO basis <ik|jl> or (ij|kl)
|
||||||
|
! i(r1) j(r1) 1/r12 k(r2) l(r2)
|
||||||
|
END_DOC
|
||||||
|
integer, intent(in) :: i,j,k,l
|
||||||
|
double precision, external :: ddot
|
||||||
|
get_ao_integ_chol = ddot(cholesky_ao_num, cholesky_ao_transp(1,i,j), 1, cholesky_ao_transp(1,k,l), 1)
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ]
|
BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ]
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
@ -25,7 +37,10 @@ END_PROVIDER
|
|||||||
! Last dimension of cholesky_ao is cholesky_ao_num
|
! Last dimension of cholesky_ao is cholesky_ao_num
|
||||||
!
|
!
|
||||||
! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html
|
! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html
|
||||||
|
!
|
||||||
! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1
|
! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1
|
||||||
|
!
|
||||||
|
! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf
|
||||||
END_DOC
|
END_DOC
|
||||||
|
|
||||||
integer*8 :: ndim8
|
integer*8 :: ndim8
|
||||||
@ -155,11 +170,15 @@ END_PROVIDER
|
|||||||
Lset(np8) = p8
|
Lset(np8) = p8
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
np = np8
|
if (np8 > ndim8) stop 'np>ndim8'
|
||||||
|
np = int(np8,4)
|
||||||
if (np <= 0) stop 'np<=0'
|
if (np <= 0) stop 'np<=0'
|
||||||
if (np > ndim8) stop 'np>ndim8'
|
|
||||||
|
|
||||||
rank_max = min(np,20*elec_num*elec_num)
|
rank_max = np
|
||||||
|
! Avoid too large arrays when there are many electrons
|
||||||
|
if (elec_num > 10) then
|
||||||
|
rank_max = min(np,20*elec_num*elec_num)
|
||||||
|
endif
|
||||||
call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1))
|
call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1))
|
||||||
call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /))
|
call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /))
|
||||||
|
|
||||||
@ -428,7 +447,7 @@ END_PROVIDER
|
|||||||
Lset(np8) = p8
|
Lset(np8) = p8
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
np = np8
|
np = int(np8,4)
|
||||||
|
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
|
@ -79,3 +79,9 @@ type: logical
|
|||||||
doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change
|
doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change
|
||||||
interface: ezfio,provider,ocaml
|
interface: ezfio,provider,ocaml
|
||||||
default: False
|
default: False
|
||||||
|
|
||||||
|
[act_mos_opt]
|
||||||
|
type: logical
|
||||||
|
doc: If |true|, the active orbitals are also optimized variationally
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: False
|
||||||
|
@ -3,3 +3,4 @@ selectors_full
|
|||||||
generators_cas
|
generators_cas
|
||||||
two_body_rdm
|
two_body_rdm
|
||||||
dav_general_mat
|
dav_general_mat
|
||||||
|
mo_optimization_utils
|
||||||
|
@ -1,18 +1,25 @@
|
|||||||
BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)]
|
BEGIN_PROVIDER [real*8, bielec_PQxx_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)]
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active
|
! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
|
||||||
|
!
|
||||||
|
! Replaced by the Cholesky-based function bielec_PQxx
|
||||||
|
!
|
||||||
|
! bielec_PQxx_array : integral (pq|xx) with p,q arbitrary, x core or active
|
||||||
! indices are unshifted orbital numbers
|
! indices are unshifted orbital numbers
|
||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
integer :: i,j,ii,jj,p,q,i3,j3,t3,v3
|
integer :: i,j,ii,jj,p,q,i3,j3,t3,v3
|
||||||
real*8 :: mo_two_e_integral
|
real*8 :: mo_two_e_integral
|
||||||
|
print*,''
|
||||||
|
print*,'Providing bielec_PQxx_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
|
||||||
|
print*,''
|
||||||
|
|
||||||
bielec_PQxx(:,:,:,:) = 0.d0
|
bielec_PQxx_array(:,:,:,:) = 0.d0
|
||||||
PROVIDE mo_two_e_integrals_in_map
|
PROVIDE mo_two_e_integrals_in_map
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PRIVATE(i,ii,j,jj,i3,j3) &
|
!$OMP PRIVATE(i,ii,j,jj,i3,j3) &
|
||||||
!$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, &
|
!$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx_array, &
|
||||||
!$OMP n_act_orb,mo_integrals_map,list_act)
|
!$OMP n_act_orb,mo_integrals_map,list_act)
|
||||||
|
|
||||||
!$OMP DO
|
!$OMP DO
|
||||||
@ -20,14 +27,14 @@ BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core
|
|||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
do j=i,n_core_inact_orb
|
do j=i,n_core_inact_orb
|
||||||
jj=list_core_inact(j)
|
jj=list_core_inact(j)
|
||||||
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map)
|
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j),mo_integrals_map)
|
||||||
bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j)
|
bielec_PQxx_array(:,:,j,i)=bielec_PQxx_array(:,:,i,j)
|
||||||
end do
|
end do
|
||||||
do j=1,n_act_orb
|
do j=1,n_act_orb
|
||||||
jj=list_act(j)
|
jj=list_act(j)
|
||||||
j3=j+n_core_inact_orb
|
j3=j+n_core_inact_orb
|
||||||
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map)
|
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j3),mo_integrals_map)
|
||||||
bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3)
|
bielec_PQxx_array(:,:,j3,i)=bielec_PQxx_array(:,:,i,j3)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
@ -40,8 +47,8 @@ BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core
|
|||||||
do j=i,n_act_orb
|
do j=i,n_act_orb
|
||||||
jj=list_act(j)
|
jj=list_act(j)
|
||||||
j3=j+n_core_inact_orb
|
j3=j+n_core_inact_orb
|
||||||
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map)
|
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i3,j3),mo_integrals_map)
|
||||||
bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3)
|
bielec_PQxx_array(:,:,j3,i3)=bielec_PQxx_array(:,:,i3,j3)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
@ -52,9 +59,13 @@ END_PROVIDER
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)]
|
BEGIN_PROVIDER [real*8, bielec_PxxQ_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)]
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active
|
! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
|
||||||
|
!
|
||||||
|
! Replaced by the Cholesky-based function bielec_PxxQ
|
||||||
|
!
|
||||||
|
! bielec_PxxQ_array : integral (px|xq) with p,q arbitrary, x core or active
|
||||||
! indices are unshifted orbital numbers
|
! indices are unshifted orbital numbers
|
||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
@ -62,12 +73,15 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
|
|||||||
double precision, allocatable :: integrals_array(:,:)
|
double precision, allocatable :: integrals_array(:,:)
|
||||||
real*8 :: mo_two_e_integral
|
real*8 :: mo_two_e_integral
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'Providing bielec_PxxQ_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
|
||||||
|
print*,''
|
||||||
PROVIDE mo_two_e_integrals_in_map
|
PROVIDE mo_two_e_integrals_in_map
|
||||||
bielec_PxxQ = 0.d0
|
bielec_PxxQ_array = 0.d0
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) &
|
!$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) &
|
||||||
!$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, &
|
!$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ_array, &
|
||||||
!$OMP n_act_orb,mo_integrals_map,list_act)
|
!$OMP n_act_orb,mo_integrals_map,list_act)
|
||||||
|
|
||||||
allocate(integrals_array(mo_num,mo_num))
|
allocate(integrals_array(mo_num,mo_num))
|
||||||
@ -80,8 +94,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
|
|||||||
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
|
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
|
||||||
do q=1,mo_num
|
do q=1,mo_num
|
||||||
do p=1,mo_num
|
do p=1,mo_num
|
||||||
bielec_PxxQ(p,i,j,q)=integrals_array(p,q)
|
bielec_PxxQ_array(p,i,j,q)=integrals_array(p,q)
|
||||||
bielec_PxxQ(p,j,i,q)=integrals_array(q,p)
|
bielec_PxxQ_array(p,j,i,q)=integrals_array(q,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -91,8 +105,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
|
|||||||
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
|
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
|
||||||
do q=1,mo_num
|
do q=1,mo_num
|
||||||
do p=1,mo_num
|
do p=1,mo_num
|
||||||
bielec_PxxQ(p,i,j3,q)=integrals_array(p,q)
|
bielec_PxxQ_array(p,i,j3,q)=integrals_array(p,q)
|
||||||
bielec_PxxQ(p,j3,i,q)=integrals_array(q,p)
|
bielec_PxxQ_array(p,j3,i,q)=integrals_array(q,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -111,8 +125,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
|
|||||||
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
|
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
|
||||||
do q=1,mo_num
|
do q=1,mo_num
|
||||||
do p=1,mo_num
|
do p=1,mo_num
|
||||||
bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q)
|
bielec_PxxQ_array(p,i3,j3,q)=integrals_array(p,q)
|
||||||
bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p)
|
bielec_PxxQ_array(p,j3,i3,q)=integrals_array(q,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -129,10 +143,15 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
|
|||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! bielecCI : integrals (tu|vp) with p arbitrary, tuv active
|
! bielecCI : integrals (tu|vp) with p arbitrary, tuv active
|
||||||
! index p runs over the whole basis, t,u,v only over the active orbitals
|
! index p runs over the whole basis, t,u,v only over the active orbitals
|
||||||
|
!
|
||||||
|
! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb
|
||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
integer :: i,j,k,p,t,u,v
|
integer :: i,j,k,p,t,u,v
|
||||||
double precision, external :: mo_two_e_integral
|
double precision, external :: mo_two_e_integral
|
||||||
|
double precision :: wall0, wall1
|
||||||
|
call wall_time(wall0)
|
||||||
|
print*,'Providing bielecCI'
|
||||||
PROVIDE mo_two_e_integrals_in_map
|
PROVIDE mo_two_e_integrals_in_map
|
||||||
|
|
||||||
!$OMP PARALLEL DO DEFAULT(NONE) &
|
!$OMP PARALLEL DO DEFAULT(NONE) &
|
||||||
@ -151,5 +170,7 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
|
|||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
!$OMP END PARALLEL DO
|
!$OMP END PARALLEL DO
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide bielecCI = ',wall1 - wall0
|
||||||
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
@ -1,30 +1,38 @@
|
|||||||
BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)]
|
BEGIN_PROVIDER [real*8, bielec_PQxx_no_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)]
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
|
! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
|
||||||
|
!
|
||||||
|
! Replaced by the Cholesky-based function bielec_PQxx_no
|
||||||
|
!
|
||||||
! integral (pq|xx) in the basis of natural MOs
|
! integral (pq|xx) in the basis of natural MOs
|
||||||
! indices are unshifted orbital numbers
|
! indices are unshifted orbital numbers
|
||||||
|
!
|
||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
integer :: i,j,k,l,t,u,p,q
|
integer :: i,j,k,l,t,u,p,q
|
||||||
double precision, allocatable :: f(:,:,:), d(:,:,:)
|
double precision, allocatable :: f(:,:,:), d(:,:,:)
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'Providing bielec_PQxx_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PRIVATE(j,k,l,p,d,f) &
|
!$OMP PRIVATE(j,k,l,p,d,f) &
|
||||||
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
|
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
|
||||||
!$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI)
|
!$OMP bielec_PQxx_no_array,bielec_PQxx_array,list_act,natorbsCI)
|
||||||
|
|
||||||
allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), &
|
allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), &
|
||||||
d(n_act_orb,mo_num,n_core_inact_act_orb))
|
d(n_act_orb,mo_num,n_core_inact_act_orb))
|
||||||
|
|
||||||
!$OMP DO
|
!$OMP DO
|
||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l)
|
bielec_PQxx_no_array(:,:,:,l) = bielec_PQxx_array(:,:,:,l)
|
||||||
|
|
||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l)
|
f(p,j,k)=bielec_PQxx_no_array(list_act(p),j,k,l)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -36,13 +44,13 @@
|
|||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k)
|
bielec_PQxx_no_array(list_act(p),j,k,l)=d(p,j,k)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
|
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l)
|
f(p,j,k)=bielec_PQxx_no_array(j,list_act(p),k,l)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -54,7 +62,7 @@
|
|||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k)
|
bielec_PQxx_no_array(j,list_act(p),k,l)=d(p,j,k)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -71,7 +79,7 @@
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do k=1,mo_num
|
do k=1,mo_num
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l)
|
f(j,k,p) = bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -83,7 +91,7 @@
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do k=1,mo_num
|
do k=1,mo_num
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p)
|
bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l)=d(j,k,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -97,7 +105,7 @@
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do k=1,mo_num
|
do k=1,mo_num
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p)
|
f(j,k,p) = bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -109,7 +117,7 @@
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do k=1,mo_num
|
do k=1,mo_num
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p)
|
bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -123,8 +131,12 @@ END_PROVIDER
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)]
|
BEGIN_PROVIDER [real*8, bielec_PxxQ_no_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)]
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
|
! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
|
||||||
|
!
|
||||||
|
! Replaced by the Cholesky-based function bielec_PxxQ_no
|
||||||
|
!
|
||||||
! integral (px|xq) in the basis of natural MOs
|
! integral (px|xq) in the basis of natural MOs
|
||||||
! indices are unshifted orbital numbers
|
! indices are unshifted orbital numbers
|
||||||
END_DOC
|
END_DOC
|
||||||
@ -132,10 +144,14 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
integer :: i,j,k,l,t,u,p,q
|
integer :: i,j,k,l,t,u,p,q
|
||||||
double precision, allocatable :: f(:,:,:), d(:,:,:)
|
double precision, allocatable :: f(:,:,:), d(:,:,:)
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'Providing bielec_PxxQ_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
|
||||||
|
print*,''
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PRIVATE(j,k,l,p,d,f) &
|
!$OMP PRIVATE(j,k,l,p,d,f) &
|
||||||
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
|
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
|
||||||
!$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI)
|
!$OMP bielec_PxxQ_no_array,bielec_PxxQ_array,list_act,natorbsCI)
|
||||||
|
|
||||||
|
|
||||||
allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), &
|
allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), &
|
||||||
@ -143,11 +159,11 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
|
|
||||||
!$OMP DO
|
!$OMP DO
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j)
|
bielec_PxxQ_no_array(:,:,:,j) = bielec_PxxQ_array(:,:,:,j)
|
||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j)
|
f(p,k,l) = bielec_PxxQ_no_array(list_act(p),k,l,j)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -159,7 +175,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l)
|
bielec_PxxQ_no_array(list_act(p),k,l,j)=d(p,k,l)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -176,7 +192,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)
|
f(p,j,l) = bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -188,7 +204,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l)
|
bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k)=d(p,j,l)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -205,7 +221,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)
|
f(j,l,p) = bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -217,7 +233,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do l=1,n_core_inact_act_orb
|
do l=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p)
|
bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k)=d(j,l,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -231,7 +247,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)
|
f(j,k,p) = bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -243,7 +259,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
|
|||||||
do p=1,n_act_orb
|
do p=1,n_act_orb
|
||||||
do k=1,n_core_inact_act_orb
|
do k=1,n_core_inact_act_orb
|
||||||
do j=1,mo_num
|
do j=1,mo_num
|
||||||
bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p)
|
bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p)
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -259,11 +275,17 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
|
|||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! integrals (tu|vp) in the basis of natural MOs
|
! integrals (tu|vp) in the basis of natural MOs
|
||||||
! index p runs over the whole basis, t,u,v only over the active orbitals
|
! index p runs over the whole basis, t,u,v only over the active orbitals
|
||||||
|
!
|
||||||
|
! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb
|
||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
integer :: i,j,k,l,t,u,p,q
|
integer :: i,j,k,l,t,u,p,q
|
||||||
double precision, allocatable :: f(:,:,:), d(:,:,:)
|
double precision, allocatable :: f(:,:,:), d(:,:,:)
|
||||||
|
|
||||||
|
double precision :: wall0, wall1
|
||||||
|
call wall_time(wall0)
|
||||||
|
print*,'Providing bielecCI_no'
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PRIVATE(j,k,l,p,d,f) &
|
!$OMP PRIVATE(j,k,l,p,d,f) &
|
||||||
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
|
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
|
||||||
@ -363,6 +385,8 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
|
|||||||
|
|
||||||
deallocate(d,f)
|
deallocate(d,f)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide bielecCI_no = ',wall1-wall0
|
||||||
|
|
||||||
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
@ -11,7 +11,7 @@ program casscf
|
|||||||
if(small_active_space)then
|
if(small_active_space)then
|
||||||
pt2_relative_error = 0.00001
|
pt2_relative_error = 0.00001
|
||||||
else
|
else
|
||||||
thresh_scf = 1.d-4
|
thresh_scf = max(1.d-4,thresh_scf)
|
||||||
pt2_relative_error = 0.04
|
pt2_relative_error = 0.04
|
||||||
endif
|
endif
|
||||||
touch pt2_relative_error
|
touch pt2_relative_error
|
||||||
@ -46,94 +46,101 @@ subroutine run
|
|||||||
do while (.not.converged)
|
do while (.not.converged)
|
||||||
print*,'pt2_max = ',pt2_max
|
print*,'pt2_max = ',pt2_max
|
||||||
call run_stochastic_cipsi(Ev,PT2)
|
call run_stochastic_cipsi(Ev,PT2)
|
||||||
print*,'Ev,PT2',Ev(1),PT2(1)
|
! if(act_mos_opt)then DOES NOT WORK
|
||||||
E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states)
|
! call run_orb_opt_trust_v2
|
||||||
energy_old = energy
|
! call run_stochastic_cipsi(Ev,PT2)
|
||||||
energy = eone+etwo+ecore
|
|
||||||
pt2_max_before = pt2_max
|
|
||||||
|
|
||||||
call write_time(6)
|
|
||||||
call write_int(6,iteration,'CAS-SCF iteration = ')
|
|
||||||
call write_double(6,energy,'State-average CAS-SCF energy = ')
|
|
||||||
! if(n_states == 1)then
|
|
||||||
! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2)
|
|
||||||
! call ezfio_get_casscf_cipsi_energy(PT2)
|
|
||||||
double precision :: delta_E_istate, e_av
|
|
||||||
e_av = 0.d0
|
|
||||||
do istate=1,N_states
|
|
||||||
e_av += state_average_weight(istate) * Ev(istate)
|
|
||||||
if(istate.gt.1)then
|
|
||||||
delta_E_istate = E_PT2(istate) - E_PT2(1)
|
|
||||||
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' Delta E+PT2 = ',delta_E_istate
|
|
||||||
endif
|
|
||||||
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate)
|
|
||||||
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate)
|
|
||||||
! call write_double(6,E_PT2(istate),'E + PT2 energy = ')
|
|
||||||
! call write_double(6,PT2(istate),' PT2 = ')
|
|
||||||
enddo
|
|
||||||
call write_double(6,e_av,'State-average CAS-SCF energy bis = ')
|
|
||||||
call write_double(6,pt2_max,' PT2_MAX = ')
|
|
||||||
! endif
|
! endif
|
||||||
|
if(.True.)then
|
||||||
|
print*,'Ev,PT2',Ev(1),PT2(1)
|
||||||
|
E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states)
|
||||||
|
energy_old = energy
|
||||||
|
energy = eone+etwo+ecore
|
||||||
|
pt2_max_before = pt2_max
|
||||||
|
|
||||||
print*,''
|
call write_time(6)
|
||||||
call write_double(6,norm_grad_vec2,'Norm of gradients = ')
|
call write_int(6,iteration,'CAS-SCF iteration = ')
|
||||||
call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ')
|
call write_double(6,energy,'State-average CAS-SCF energy = ')
|
||||||
call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ')
|
!! if(n_states == 1)then
|
||||||
call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ')
|
!! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2)
|
||||||
print*,''
|
!! call ezfio_get_casscf_cipsi_energy(PT2)
|
||||||
call write_double(6,energy_improvement, 'Predicted energy improvement = ')
|
double precision :: delta_E_istate, e_av
|
||||||
|
e_av = 0.d0
|
||||||
|
do istate=1,N_states
|
||||||
|
e_av += state_average_weight(istate) * Ev(istate)
|
||||||
|
if(istate.gt.1)then
|
||||||
|
delta_E_istate = E_PT2(istate) - E_PT2(1)
|
||||||
|
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' Delta E+PT2 = ',delta_E_istate
|
||||||
|
endif
|
||||||
|
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate)
|
||||||
|
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate)
|
||||||
|
!! call write_double(6,E_PT2(istate),'E + PT2 energy = ')
|
||||||
|
!! call write_double(6,PT2(istate),' PT2 = ')
|
||||||
|
enddo
|
||||||
|
call write_double(6,e_av,'State-average CAS-SCF energy bis = ')
|
||||||
|
call write_double(6,pt2_max,' PT2_MAX = ')
|
||||||
|
!! endif
|
||||||
|
|
||||||
if(criterion_casscf == "energy")then
|
print*,''
|
||||||
converged = dabs(energy_improvement) < thresh_scf
|
call write_double(6,norm_grad_vec2,'Norm of gradients = ')
|
||||||
else if (criterion_casscf == "gradients")then
|
call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ')
|
||||||
converged = norm_grad_vec2 < thresh_scf
|
call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ')
|
||||||
else if (criterion_casscf == "e_pt2")then
|
call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ')
|
||||||
delta_E = 0.d0
|
print*,''
|
||||||
do istate = 1, N_states
|
call write_double(6,energy_improvement, 'Predicted energy improvement = ')
|
||||||
delta_E += dabs(E_PT2(istate) - ept2_before(istate))
|
|
||||||
enddo
|
if(criterion_casscf == "energy")then
|
||||||
converged = dabs(delta_E) < thresh_casscf
|
converged = dabs(energy_improvement) < thresh_scf
|
||||||
endif
|
else if (criterion_casscf == "gradients")then
|
||||||
ept2_before = E_PT2
|
converged = norm_grad_vec2 < thresh_scf
|
||||||
if(.not.small_active_space)then
|
else if (criterion_casscf == "e_pt2")then
|
||||||
if(adaptive_pt2_max)then
|
delta_E = 0.d0
|
||||||
pt2_max = dabs(energy_improvement / (pt2_relative_error))
|
do istate = 1, N_states
|
||||||
pt2_max = min(pt2_max, pt2_max_before)
|
delta_E += dabs(E_PT2(istate) - ept2_before(istate))
|
||||||
if(n_act_orb.ge.n_big_act_orb)then
|
enddo
|
||||||
pt2_max = max(pt2_max,pt2_min_casscf)
|
converged = dabs(delta_E) < thresh_casscf
|
||||||
endif
|
|
||||||
endif
|
endif
|
||||||
endif
|
ept2_before = E_PT2
|
||||||
print*,''
|
|
||||||
call write_double(6,pt2_max, 'PT2_MAX for next iteration = ')
|
|
||||||
|
|
||||||
mo_coef = NewOrbs
|
|
||||||
mo_occ = occnum
|
|
||||||
if(.not.converged)then
|
|
||||||
call save_mos
|
|
||||||
iteration += 1
|
|
||||||
if(norm_grad_vec2.gt.0.01d0)then
|
|
||||||
N_det = N_states
|
|
||||||
else
|
|
||||||
N_det = max(N_det/8 ,N_states)
|
|
||||||
endif
|
|
||||||
psi_det = psi_det_sorted
|
|
||||||
psi_coef = psi_coef_sorted
|
|
||||||
read_wf = .True.
|
|
||||||
call clear_mo_map
|
|
||||||
SOFT_TOUCH mo_coef N_det psi_det psi_coef
|
|
||||||
if(.not.small_active_space)then
|
if(.not.small_active_space)then
|
||||||
if(adaptive_pt2_max)then
|
if(adaptive_pt2_max)then
|
||||||
SOFT_TOUCH pt2_max
|
pt2_max = dabs(energy_improvement / (pt2_relative_error))
|
||||||
|
pt2_max = min(pt2_max, pt2_max_before)
|
||||||
|
if(n_act_orb.ge.n_big_act_orb)then
|
||||||
|
pt2_max = max(pt2_max,pt2_min_casscf)
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
if(iteration .gt. 3)then
|
print*,''
|
||||||
state_following_casscf = state_following_casscf_cipsi_save
|
call write_double(6,pt2_max, 'PT2_MAX for next iteration = ')
|
||||||
soft_touch state_following_casscf
|
|
||||||
|
mo_coef = NewOrbs
|
||||||
|
mo_occ = occnum
|
||||||
|
if(.not.converged)then
|
||||||
|
call save_mos
|
||||||
|
iteration += 1
|
||||||
|
if(norm_grad_vec2.gt.0.01d0)then
|
||||||
|
N_det = N_states
|
||||||
|
else
|
||||||
|
N_det = max(N_det/8 ,N_states)
|
||||||
|
endif
|
||||||
|
psi_det = psi_det_sorted
|
||||||
|
psi_coef = psi_coef_sorted
|
||||||
|
read_wf = .True.
|
||||||
|
call clear_mo_map
|
||||||
|
SOFT_TOUCH mo_coef N_det psi_det psi_coef
|
||||||
|
if(.not.small_active_space)then
|
||||||
|
if(adaptive_pt2_max)then
|
||||||
|
SOFT_TOUCH pt2_max
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
if(iteration .gt. 3)then
|
||||||
|
state_following_casscf = state_following_casscf_cipsi_save
|
||||||
|
soft_touch state_following_casscf
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
enddo
|
enddo
|
||||||
|
if(.True.)then
|
||||||
integer :: i
|
integer :: i
|
||||||
print*,'Converged CASSCF '
|
print*,'Converged CASSCF '
|
||||||
print*,'--------------------------'
|
print*,'--------------------------'
|
||||||
@ -153,6 +160,7 @@ subroutine run
|
|||||||
! write(*,*)mcscf_fock_alpha_mo(i,i)
|
! write(*,*)mcscf_fock_alpha_mo(i,i)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
248
src/casscf_cipsi/chol_bielec.irp.f
Normal file
248
src/casscf_cipsi/chol_bielec.irp.f
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
|
||||||
|
BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_act_orb, mo_num)]
|
||||||
|
BEGIN_DOC
|
||||||
|
! Cholesky vectors with ONE orbital on the active natural orbital basis
|
||||||
|
END_DOC
|
||||||
|
implicit none
|
||||||
|
integer :: i_chol,i_act,i_mo,jj_act
|
||||||
|
double precision, allocatable :: chol_tmp(:,:)
|
||||||
|
double precision :: wall0,wall1
|
||||||
|
call wall_time(wall0)
|
||||||
|
print*,'Providing cholesky_no_1_idx_transp'
|
||||||
|
allocate(chol_tmp(cholesky_mo_num,n_act_orb))
|
||||||
|
cholesky_no_1_idx_transp = 0.D0
|
||||||
|
do i_mo = 1, mo_num
|
||||||
|
! Get all the integrals corresponding to the "i_mo"
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
jj_act = list_act(i_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
chol_tmp(i_chol, i_act) = cholesky_mo_transp(i_chol, jj_act, i_mo)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, &
|
||||||
|
chol_tmp, size(chol_tmp,1), &
|
||||||
|
natorbsCI, size(natorbsCI,1), &
|
||||||
|
0.d0, &
|
||||||
|
cholesky_no_1_idx_transp(1,1,i_mo), size(cholesky_no_1_idx_transp,1))
|
||||||
|
enddo
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide cholesky_no_1_idx_transp = ', wall1 - wall0
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)]
|
||||||
|
BEGIN_DOC
|
||||||
|
! Cholesky vectors with TWO orbital on the active natural orbital basis
|
||||||
|
END_DOC
|
||||||
|
implicit none
|
||||||
|
integer :: i_chol,i_act,j_act,jj_act
|
||||||
|
double precision, allocatable :: chol_tmp(:,:),chol_tmp_bis(:,:)
|
||||||
|
allocate(chol_tmp(cholesky_mo_num,n_act_orb),chol_tmp_bis(cholesky_mo_num,n_act_orb))
|
||||||
|
double precision :: wall0,wall1
|
||||||
|
call wall_time(wall0)
|
||||||
|
print*,'Providing cholesky_no_2_idx_transp'
|
||||||
|
cholesky_no_2_idx_transp = 0.D0
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
! Get all the integrals corresponding to the "j_act"
|
||||||
|
do j_act = 1, n_act_orb
|
||||||
|
jj_act = list_act(j_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
chol_tmp(i_chol, j_act) = cholesky_no_1_idx_transp(i_chol, i_act, jj_act)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, &
|
||||||
|
chol_tmp, size(chol_tmp,1), &
|
||||||
|
natorbsCI, size(natorbsCI,1), &
|
||||||
|
0.d0, &
|
||||||
|
cholesky_no_2_idx_transp(1,1,i_act), size(cholesky_no_2_idx_transp,1))
|
||||||
|
enddo
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide cholesky_no_2_idx_transp = ', wall1 - wall0
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, cholesky_no_total_transp, (cholesky_mo_num, mo_num, mo_num)]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Cholesky vectors defined on all basis including the NO basis
|
||||||
|
END_DOC
|
||||||
|
integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact
|
||||||
|
integer :: i_virt, ii_virt, j_virt, jj_virt
|
||||||
|
double precision :: wall0,wall1
|
||||||
|
call wall_time(wall0)
|
||||||
|
print*,'Providing cholesky_no_total_transp '
|
||||||
|
! Block when two orbitals belong to the core/inact
|
||||||
|
do j_core_inact = 1, n_core_inact_orb
|
||||||
|
jj_core_inact = list_core_inact(j_core_inact)
|
||||||
|
do i_core_inact = 1, n_core_inact_orb
|
||||||
|
ii_core_inact = list_core_inact(i_core_inact)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol, ii_core_inact, jj_core_inact) = cholesky_mo_transp(i_chol,ii_core_inact,jj_core_inact)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Block when one orbitals belongs to the core/inact and one belongs to the active
|
||||||
|
do j_core_inact = 1, n_core_inact_orb
|
||||||
|
jj_core_inact = list_core_inact(j_core_inact)
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
ii_act = list_act(i_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol,ii_act,j_core_inact) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
do j_core_inact = 1, n_core_inact_orb
|
||||||
|
jj_core_inact = list_core_inact(j_core_inact)
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
ii_act = list_act(i_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol,j_core_inact,ii_act) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Block when two orbitals belong to the active
|
||||||
|
do j_act = 1, n_act_orb
|
||||||
|
jj_act = list_act(j_act)
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
ii_act = list_act(i_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol,ii_act,jj_act) = cholesky_no_2_idx_transp(i_chol,i_act,j_act)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Block when two orbitals belong to the virtuals
|
||||||
|
do i_virt = 1, n_virt_orb
|
||||||
|
ii_virt = list_virt(i_virt)
|
||||||
|
do j_virt = 1, n_virt_orb
|
||||||
|
jj_virt = list_virt(j_virt)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol,jj_virt,ii_virt) = cholesky_mo_transp(i_chol,jj_virt,ii_virt)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Block when one orbital is in active and the other in the virtuals
|
||||||
|
do i_virt = 1, n_virt_orb
|
||||||
|
ii_virt = list_virt(i_virt)
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
ii_act = list_act(i_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol,ii_act,ii_virt) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
do i_virt = 1, n_virt_orb
|
||||||
|
ii_virt = list_virt(i_virt)
|
||||||
|
do i_act = 1, n_act_orb
|
||||||
|
ii_act = list_act(i_act)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol,ii_virt,ii_act) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
! Block when one orbital is in the virtual and one in the core-inact
|
||||||
|
do i_virt = 1, n_virt_orb
|
||||||
|
ii_virt = list_virt(i_virt)
|
||||||
|
do i_core_inact = 1, n_core_inact_orb
|
||||||
|
ii_core_inact = list_core_inact(i_core_inact)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol, ii_core_inact, ii_virt) = cholesky_mo_transp(i_chol, ii_core_inact, ii_virt)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
do i_core_inact = 1, n_core_inact_orb
|
||||||
|
ii_core_inact = list_core_inact(i_core_inact)
|
||||||
|
do i_virt = 1, n_virt_orb
|
||||||
|
ii_virt = list_virt(i_virt)
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_no_total_transp(i_chol, ii_virt, ii_core_inact) = cholesky_mo_transp(i_chol, ii_virt, ii_core_inact)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide cholesky_no_total_transp = ', wall1 - wall0
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
|
double precision function bielec_no_basis(i_1,j_1,i_2,j_2)
|
||||||
|
implicit none
|
||||||
|
integer, intent(in) :: i_1,j_1,i_2,j_2
|
||||||
|
BEGIN_DOC
|
||||||
|
! integral (i_1 j_1|i_2 j_2) in the mixed basis of both MOs and natural MOs
|
||||||
|
!
|
||||||
|
END_DOC
|
||||||
|
integer :: i
|
||||||
|
bielec_no_basis = 0.d0
|
||||||
|
do i = 1, cholesky_mo_num
|
||||||
|
bielec_no_basis += cholesky_no_total_transp(i,i_1, j_1) * cholesky_no_total_transp(i,i_2,j_2)
|
||||||
|
enddo
|
||||||
|
end
|
||||||
|
|
||||||
|
double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca)
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! function that computes (i_mo j_mo| i_ca j_ca) with Cholesky decomposition on the NO basis for active orbitals
|
||||||
|
!
|
||||||
|
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
|
||||||
|
END_DOC
|
||||||
|
integer, intent(in) :: i_ca, j_ca, i_mo, j_mo
|
||||||
|
integer :: ii_ca, jj_ca
|
||||||
|
double precision :: bielec_no_basis
|
||||||
|
ii_ca = list_core_inact_act(i_ca)
|
||||||
|
jj_ca = list_core_inact_act(j_ca)
|
||||||
|
bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca)
|
||||||
|
end
|
||||||
|
|
||||||
|
double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo)
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! function that computes (i_mo j_ca |i_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals
|
||||||
|
!
|
||||||
|
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
|
||||||
|
END_DOC
|
||||||
|
integer, intent(in) :: i_ca, j_ca, i_mo, j_mo
|
||||||
|
integer :: ii_ca, jj_ca
|
||||||
|
double precision :: bielec_no_basis
|
||||||
|
ii_ca = list_core_inact_act(i_ca)
|
||||||
|
jj_ca = list_core_inact_act(j_ca)
|
||||||
|
bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo)
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
double precision function bielec_PQxx(i_mo, j_mo, i_ca, j_ca)
|
||||||
|
BEGIN_DOC
|
||||||
|
! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition
|
||||||
|
!
|
||||||
|
! indices are unshifted orbital numbers
|
||||||
|
!
|
||||||
|
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
|
||||||
|
END_DOC
|
||||||
|
implicit none
|
||||||
|
integer, intent(in) :: i_ca, j_ca, j_mo, i_mo
|
||||||
|
double precision :: mo_two_e_integral
|
||||||
|
integer :: ii_ca, jj_ca
|
||||||
|
ii_ca = list_core_inact_act(i_ca)
|
||||||
|
jj_ca = list_core_inact_act(j_ca)
|
||||||
|
bielec_PQxx = mo_two_e_integral(i_mo,ii_ca,j_mo,jj_ca)
|
||||||
|
end
|
||||||
|
|
||||||
|
double precision function bielec_PxxQ(i_mo, i_ca, j_ca, j_mo)
|
||||||
|
BEGIN_DOC
|
||||||
|
! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition
|
||||||
|
!
|
||||||
|
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
|
||||||
|
END_DOC
|
||||||
|
implicit none
|
||||||
|
integer, intent(in) :: i_ca, j_ca, j_mo, i_mo
|
||||||
|
double precision :: mo_two_e_integral
|
||||||
|
integer :: ii_ca, jj_ca
|
||||||
|
ii_ca = list_core_inact_act(i_ca)
|
||||||
|
jj_ca = list_core_inact_act(j_ca)
|
||||||
|
bielec_PxxQ = mo_two_e_integral(i_mo,jj_ca,ii_ca,j_mo)
|
||||||
|
end
|
||||||
|
|
34
src/casscf_cipsi/chol_garb.irp.f
Normal file
34
src/casscf_cipsi/chol_garb.irp.f
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
|
||||||
|
!!!!! FUNCTIONS THAT WORK BUT WHICH ARE USELESS AS THE ARRAYS CAN ALWAYS BE STORED
|
||||||
|
!double precision function bielecCI_chol(i_a, j_a, k_a, i_mo)
|
||||||
|
! BEGIN_DOC
|
||||||
|
! ! function that computes (i_a j_a |k_a j_mo) with Cholesky decomposition
|
||||||
|
! !
|
||||||
|
! ! where i_a, j_a, k_a are in [1:n_act_orb] !!! ONLY ON ACTIVE
|
||||||
|
! END_DOC
|
||||||
|
! implicit none
|
||||||
|
! integer, intent(in) :: i_a, j_a, k_a, i_mo
|
||||||
|
! integer :: ii_a, jj_a, kk_a
|
||||||
|
! double precision :: mo_two_e_integral
|
||||||
|
! ii_a = list_act(i_a)
|
||||||
|
! jj_a = list_act(j_a)
|
||||||
|
! kk_a = list_act(k_a)
|
||||||
|
! bielecCI_chol = mo_two_e_integral(ii_a,kk_a,jj_a,i_mo)
|
||||||
|
!end
|
||||||
|
|
||||||
|
!double precision function bielecCI_no_chol(i_ca, j_ca, k_ca, i_mo)
|
||||||
|
! BEGIN_DOC
|
||||||
|
! ! function that computes (i_ca j_ca |k_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals
|
||||||
|
! !
|
||||||
|
! ! where i_ca, j_ca, k_ca are in [1:n_core_inact_act_orb]
|
||||||
|
! END_DOC
|
||||||
|
! implicit none
|
||||||
|
! integer, intent(in) :: i_ca, j_ca, k_ca, i_mo
|
||||||
|
! integer :: ii_ca, jj_ca, kk_ca
|
||||||
|
! double precision :: bielec_no_basis_chol
|
||||||
|
! ii_ca = list_act(i_ca)
|
||||||
|
! jj_ca = list_act(j_ca)
|
||||||
|
! kk_ca = list_act(k_ca)
|
||||||
|
! bielecCI_no_chol = bielec_no_basis_chol(ii_ca, jj_ca, kk_ca, i_mo)
|
||||||
|
!
|
||||||
|
!end
|
@ -157,6 +157,7 @@ real*8 function gradvec_it(i,t)
|
|||||||
|
|
||||||
integer :: ii,tt,v,vv,x,y
|
integer :: ii,tt,v,vv,x,y
|
||||||
integer :: x3,y3
|
integer :: x3,y3
|
||||||
|
double precision :: bielec_PQxx_no
|
||||||
|
|
||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
tt=list_act(t)
|
tt=list_act(t)
|
||||||
|
@ -10,6 +10,7 @@ real*8 function hessmat_itju(i,t,j,u)
|
|||||||
implicit none
|
implicit none
|
||||||
integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj
|
integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj
|
||||||
real*8 :: term,t2
|
real*8 :: term,t2
|
||||||
|
double precision :: bielec_pqxx_no,bielec_pxxq_no
|
||||||
|
|
||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
tt=list_act(t)
|
tt=list_act(t)
|
||||||
@ -95,6 +96,7 @@ real*8 function hessmat_itja(i,t,j,a)
|
|||||||
implicit none
|
implicit none
|
||||||
integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y
|
integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y
|
||||||
real*8 :: term
|
real*8 :: term
|
||||||
|
double precision :: bielec_pqxx_no,bielec_pxxq_no
|
||||||
|
|
||||||
! it/ja
|
! it/ja
|
||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
@ -128,6 +130,7 @@ real*8 function hessmat_itua(i,t,u,a)
|
|||||||
implicit none
|
implicit none
|
||||||
integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3
|
integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3
|
||||||
real*8 :: term
|
real*8 :: term
|
||||||
|
double precision :: bielec_pqxx_no,bielec_pxxq_no
|
||||||
|
|
||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
tt=list_act(t)
|
tt=list_act(t)
|
||||||
@ -169,6 +172,7 @@ real*8 function hessmat_iajb(i,a,j,b)
|
|||||||
implicit none
|
implicit none
|
||||||
integer :: i,a,j,b,ii,aa,jj,bb
|
integer :: i,a,j,b,ii,aa,jj,bb
|
||||||
real*8 :: term
|
real*8 :: term
|
||||||
|
double precision :: bielec_pqxx_no,bielec_pxxq_no
|
||||||
|
|
||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
aa=list_virt(a)
|
aa=list_virt(a)
|
||||||
@ -205,6 +209,7 @@ real*8 function hessmat_iatb(i,a,t,b)
|
|||||||
implicit none
|
implicit none
|
||||||
integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3
|
integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3
|
||||||
real*8 :: term
|
real*8 :: term
|
||||||
|
double precision :: bielec_pqxx_no,bielec_pxxq_no
|
||||||
|
|
||||||
ii=list_core_inact(i)
|
ii=list_core_inact(i)
|
||||||
aa=list_virt(a)
|
aa=list_virt(a)
|
||||||
@ -237,6 +242,7 @@ real*8 function hessmat_taub(t,a,u,b)
|
|||||||
integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y
|
integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y
|
||||||
integer :: v3,x3
|
integer :: v3,x3
|
||||||
real*8 :: term,t1,t2,t3
|
real*8 :: term,t1,t2,t3
|
||||||
|
double precision :: bielec_pqxx_no,bielec_pxxq_no
|
||||||
|
|
||||||
tt=list_act(t)
|
tt=list_act(t)
|
||||||
aa=list_virt(a)
|
aa=list_virt(a)
|
||||||
|
@ -4,6 +4,7 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ]
|
|||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
integer :: p,q,k,kk,t,tt,u,uu
|
integer :: p,q,k,kk,t,tt,u,uu
|
||||||
|
double precision :: bielec_pxxq_no, bielec_pqxx_no
|
||||||
|
|
||||||
do q=1,mo_num
|
do q=1,mo_num
|
||||||
do p=1,mo_num
|
do p=1,mo_num
|
||||||
@ -44,6 +45,7 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ]
|
|||||||
END_DOC
|
END_DOC
|
||||||
implicit none
|
implicit none
|
||||||
integer :: p,q,k,kk,t,tt,u,uu
|
integer :: p,q,k,kk,t,tt,u,uu
|
||||||
|
double precision :: bielec_pxxq_no, bielec_pqxx_no
|
||||||
|
|
||||||
Fapq = 0.d0
|
Fapq = 0.d0
|
||||||
|
|
||||||
|
116
src/casscf_cipsi/test_chol.irp.f
Normal file
116
src/casscf_cipsi/test_chol.irp.f
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
program test_chol
|
||||||
|
implicit none
|
||||||
|
read_wf= .True.
|
||||||
|
touch read_wf
|
||||||
|
! call routine_bielec_PxxQ_no
|
||||||
|
! call routine_bielecCI_no
|
||||||
|
! call test_bielec_PxxQ_chol
|
||||||
|
! call test_bielecCI
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine routine_bielec_PQxx_no
|
||||||
|
implicit none
|
||||||
|
integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact
|
||||||
|
integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo
|
||||||
|
double precision :: exact, new, error, accu, bielec_no_basis_chol
|
||||||
|
double precision :: bielec_PQxx_no
|
||||||
|
|
||||||
|
accu = 0.d0
|
||||||
|
do i_core_inact = 1, n_core_inact_act_orb
|
||||||
|
ii_core_inact = list_core_inact_act(i_core_inact)
|
||||||
|
do j_core_inact = 1, n_core_inact_act_orb
|
||||||
|
jj_core_inact = list_core_inact_act(j_core_inact)
|
||||||
|
do i_mo = 1, mo_num
|
||||||
|
do j_mo = 1, mo_num
|
||||||
|
exact = bielec_PQxx_no_array(j_mo,i_mo, j_core_inact, i_core_inact)
|
||||||
|
new = bielec_PQxx_no(j_mo,i_mo, j_core_inact, i_core_inact)
|
||||||
|
error = dabs(exact-new)
|
||||||
|
if(dabs(exact).gt.1.d-10)then
|
||||||
|
print*,exact,new,error
|
||||||
|
endif
|
||||||
|
accu += error
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine routine_bielec_PxxQ_no_array
|
||||||
|
implicit none
|
||||||
|
integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact
|
||||||
|
integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo
|
||||||
|
double precision :: exact, new, error, accu, bielec_no_basis_chol
|
||||||
|
double precision :: bielec_PxxQ_no
|
||||||
|
|
||||||
|
accu = 0.d0
|
||||||
|
do i_mo = 1, mo_num
|
||||||
|
do i_core_inact = 1, n_core_inact_act_orb
|
||||||
|
ii_core_inact = list_core_inact_act(i_core_inact)
|
||||||
|
do j_core_inact = 1, n_core_inact_act_orb
|
||||||
|
jj_core_inact = list_core_inact_act(j_core_inact)
|
||||||
|
do j_mo = 1, mo_num
|
||||||
|
exact = bielec_PxxQ_no_array(j_mo, j_core_inact, i_core_inact,i_mo)
|
||||||
|
! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact)
|
||||||
|
new = bielec_PxxQ_no(j_mo, j_core_inact, i_core_inact,i_mo)
|
||||||
|
error = dabs(exact-new)
|
||||||
|
accu += error
|
||||||
|
if(dabs(exact).gt.1.d-10)then
|
||||||
|
print*,exact,new,error
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine test_bielec_PQxx(i_mo, j_mo, i_ca, j_ca)
|
||||||
|
implicit none
|
||||||
|
integer :: i_mo, j_mo, i_ca, j_ca
|
||||||
|
double precision :: exact, new, error, accu
|
||||||
|
double precision :: bielec_PQxx
|
||||||
|
|
||||||
|
accu = 0.d0
|
||||||
|
do j_ca = 1, n_core_inact_act_orb
|
||||||
|
do i_ca = 1, n_core_inact_act_orb
|
||||||
|
do j_mo = 1, mo_num
|
||||||
|
do i_mo = 1, mo_num
|
||||||
|
exact = bielec_PQxx_array(i_mo, j_mo, i_ca, j_ca)
|
||||||
|
new = bielec_PQxx(i_mo, j_mo, i_ca, j_ca)
|
||||||
|
error = dabs(exact-new)
|
||||||
|
accu += error
|
||||||
|
if(dabs(exact).gt.1.d-10)then
|
||||||
|
print*,exact,new,error
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine test_bielec_PxxQ_chol(i_mo, i_ca, j_ca, j_mo)
|
||||||
|
implicit none
|
||||||
|
integer :: i_mo, i_ca, j_ca, j_mo
|
||||||
|
double precision :: exact, new, error, accu
|
||||||
|
double precision :: bielec_PxxQ
|
||||||
|
accu = 0.d0
|
||||||
|
do j_mo = 1, mo_num
|
||||||
|
do j_ca = 1, n_core_inact_act_orb
|
||||||
|
do i_ca =1, n_core_inact_act_orb
|
||||||
|
do i_mo = 1, mo_num
|
||||||
|
exact = bielec_PxxQ_array(i_mo, i_ca, j_ca, j_mo)
|
||||||
|
new = bielec_PxxQ(i_mo, i_ca, j_ca, j_mo)
|
||||||
|
error = dabs(exact-new)
|
||||||
|
accu += error
|
||||||
|
if(dabs(exact).gt.1.d-10)then
|
||||||
|
print*,exact,new,error
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
|
||||||
|
end
|
@ -8,6 +8,7 @@
|
|||||||
implicit none
|
implicit none
|
||||||
integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3
|
integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3
|
||||||
real*8 :: e_one_all,e_two_all
|
real*8 :: e_one_all,e_two_all
|
||||||
|
double precision :: bielec_PQxx,bielec_PxxQ
|
||||||
e_one_all=0.D0
|
e_one_all=0.D0
|
||||||
e_two_all=0.D0
|
e_two_all=0.D0
|
||||||
do i=1,n_core_inact_orb
|
do i=1,n_core_inact_orb
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
|
gpu
|
||||||
hartree_fock
|
hartree_fock
|
||||||
utils_cc
|
utils_cc
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
subroutine run_ccsd_space_orb
|
subroutine run_ccsd_space_orb
|
||||||
|
use gpu
|
||||||
|
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
@ -9,9 +10,19 @@ subroutine run_ccsd_space_orb
|
|||||||
double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
|
double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
|
||||||
logical :: not_converged
|
logical :: not_converged
|
||||||
|
|
||||||
double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:)
|
type(gpu_double4) :: t2, r2, tau, tau_x
|
||||||
double precision, allocatable :: t1(:,:), r1(:,:)
|
type(gpu_double2) :: t1, r1
|
||||||
double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:)
|
type(gpu_double2) :: H_oo, H_vv, H_vo
|
||||||
|
|
||||||
|
type(gpu_double2) :: d_cc_space_f_oo, d_cc_space_f_vo
|
||||||
|
type(gpu_double2) :: d_cc_space_f_ov, d_cc_space_f_vv
|
||||||
|
|
||||||
|
type(gpu_double3) :: d_cc_space_v_oo_chol, d_cc_space_v_vo_chol
|
||||||
|
type(gpu_double3) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol
|
||||||
|
|
||||||
|
type(gpu_double4) :: d_cc_space_v_oovv, d_cc_space_v_voov, d_cc_space_v_ovov
|
||||||
|
type(gpu_double4) :: d_cc_space_v_oovo, d_cc_space_v_vooo, d_cc_space_v_oooo
|
||||||
|
type(gpu_double4) :: d_cc_space_v_vvoo, d_cc_space_v_ovvo, d_cc_space_v_ovoo
|
||||||
|
|
||||||
double precision, allocatable :: all_err(:,:), all_t(:,:)
|
double precision, allocatable :: all_err(:,:), all_t(:,:)
|
||||||
integer, allocatable :: list_occ(:), list_vir(:)
|
integer, allocatable :: list_occ(:), list_vir(:)
|
||||||
@ -20,7 +31,7 @@ subroutine run_ccsd_space_orb
|
|||||||
|
|
||||||
call set_multiple_levels_omp(.False.)
|
call set_multiple_levels_omp(.False.)
|
||||||
|
|
||||||
if (do_ao_cholesky) then
|
if (do_mo_cholesky) then
|
||||||
PROVIDE cholesky_mo_transp
|
PROVIDE cholesky_mo_transp
|
||||||
FREE cholesky_ao
|
FREE cholesky_ao
|
||||||
else
|
else
|
||||||
@ -51,11 +62,77 @@ subroutine run_ccsd_space_orb
|
|||||||
!print*,'occ',list_occ
|
!print*,'occ',list_occ
|
||||||
!print*,'vir',list_vir
|
!print*,'vir',list_vir
|
||||||
|
|
||||||
allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV))
|
! GPU arrays
|
||||||
allocate(tau(nO,nO,nV,nV))
|
call gpu_allocate(d_cc_space_f_oo, nO, nO)
|
||||||
allocate(tau_x(nO,nO,nV,nV))
|
call gpu_allocate(d_cc_space_f_vo, nV, nO)
|
||||||
allocate(t1(nO,nV), r1(nO,nV))
|
call gpu_allocate(d_cc_space_f_ov, nO, nV)
|
||||||
allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO))
|
call gpu_allocate(d_cc_space_f_vv, nV, nV)
|
||||||
|
|
||||||
|
call gpu_upload(cc_space_f_oo, d_cc_space_f_oo)
|
||||||
|
call gpu_upload(cc_space_f_vo, d_cc_space_f_vo)
|
||||||
|
call gpu_upload(cc_space_f_ov, d_cc_space_f_ov)
|
||||||
|
call gpu_upload(cc_space_f_vv, d_cc_space_f_vv)
|
||||||
|
|
||||||
|
! FREE cc_space_f_oo
|
||||||
|
! FREE cc_space_f_vo
|
||||||
|
! FREE cc_space_f_vv
|
||||||
|
|
||||||
|
if (do_mo_cholesky) then
|
||||||
|
call gpu_allocate(d_cc_space_v_oo_chol, cholesky_mo_num, nO, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_ov_chol, cholesky_mo_num, nO, nV)
|
||||||
|
call gpu_allocate(d_cc_space_v_vo_chol, cholesky_mo_num, nV, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_vv_chol, cholesky_mo_num, nV, nV)
|
||||||
|
|
||||||
|
call gpu_upload(cc_space_v_oo_chol, d_cc_space_v_oo_chol)
|
||||||
|
call gpu_upload(cc_space_v_ov_chol, d_cc_space_v_ov_chol)
|
||||||
|
call gpu_upload(cc_space_v_vo_chol, d_cc_space_v_vo_chol)
|
||||||
|
call gpu_upload(cc_space_v_vv_chol, d_cc_space_v_vv_chol)
|
||||||
|
|
||||||
|
! FREE cc_space_v_oo_chol
|
||||||
|
! FREE cc_space_v_ov_chol
|
||||||
|
! FREE cc_space_v_vo_chol
|
||||||
|
! FREE cc_space_v_vv_chol
|
||||||
|
endif
|
||||||
|
|
||||||
|
call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV)
|
||||||
|
call gpu_allocate(d_cc_space_v_voov, nV, nO, nO, nV)
|
||||||
|
call gpu_allocate(d_cc_space_v_ovov, nO, nV, nO, nV)
|
||||||
|
call gpu_allocate(d_cc_space_v_oovo, nO, nO, nV, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_ovvo, nO, nV, nV, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_vooo, nV, nO, nO, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_oooo, nO, nO, nO, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_vvoo, nV, nV, nO, nO)
|
||||||
|
call gpu_allocate(d_cc_space_v_ovoo, nO, nV, nO, nO)
|
||||||
|
|
||||||
|
call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv)
|
||||||
|
call gpu_upload(cc_space_v_voov, d_cc_space_v_voov)
|
||||||
|
call gpu_upload(cc_space_v_ovov, d_cc_space_v_ovov)
|
||||||
|
call gpu_upload(cc_space_v_oovo, d_cc_space_v_oovo)
|
||||||
|
call gpu_upload(cc_space_v_ovvo, d_cc_space_v_ovvo)
|
||||||
|
call gpu_upload(cc_space_v_vooo, d_cc_space_v_vooo)
|
||||||
|
call gpu_upload(cc_space_v_oooo, d_cc_space_v_oooo)
|
||||||
|
call gpu_upload(cc_space_v_vvoo, d_cc_space_v_vvoo)
|
||||||
|
call gpu_upload(cc_space_v_ovoo, d_cc_space_v_ovoo)
|
||||||
|
|
||||||
|
! FREE cc_space_v_voov
|
||||||
|
! FREE cc_space_v_ovov
|
||||||
|
! FREE cc_space_v_oovo
|
||||||
|
! FREE cc_space_v_oovv
|
||||||
|
! FREE cc_space_v_vooo
|
||||||
|
! FREE cc_space_v_oooo
|
||||||
|
! FREE cc_space_v_vvoo
|
||||||
|
! FREE cc_space_v_ovvo
|
||||||
|
! FREE cc_space_v_ovoo
|
||||||
|
|
||||||
|
call gpu_allocate(t2, nO,nO,nV,nV)
|
||||||
|
call gpu_allocate(r2, nO,nO,nV,nV)
|
||||||
|
call gpu_allocate(tau, nO,nO,nV,nV)
|
||||||
|
call gpu_allocate(tau_x, nO,nO,nV,nV)
|
||||||
|
call gpu_allocate(t1, nO,nV)
|
||||||
|
call gpu_allocate(r1, nO,nV)
|
||||||
|
call gpu_allocate(H_oo, nO, nO)
|
||||||
|
call gpu_allocate(H_vo, nV, nO)
|
||||||
|
call gpu_allocate(H_vv, nV, nV)
|
||||||
|
|
||||||
if (cc_update_method == 'diis') then
|
if (cc_update_method == 'diis') then
|
||||||
double precision :: rss, diis_mem, extra_mem
|
double precision :: rss, diis_mem, extra_mem
|
||||||
@ -97,14 +174,22 @@ subroutine run_ccsd_space_orb
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
! Init
|
! Init
|
||||||
call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1)
|
double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:)
|
||||||
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2)
|
allocate(h_t1(nO,nV), h_t2(nO,nO,nV,nV))
|
||||||
call update_tau_space(nO,nV,t1,t2,tau)
|
|
||||||
|
call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,h_t1)
|
||||||
|
call gpu_upload(h_t1, t1)
|
||||||
|
|
||||||
|
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2)
|
||||||
|
call gpu_upload(h_t2, t2)
|
||||||
|
|
||||||
|
|
||||||
|
call update_tau_space(nO,nV,h_t1,t1,t2,tau)
|
||||||
call update_tau_x_space(nO,nV,tau,tau_x)
|
call update_tau_x_space(nO,nV,tau,tau_x)
|
||||||
!print*,'hf_energy', hf_energy
|
|
||||||
call det_energy(det,uncorr_energy)
|
call det_energy(det,uncorr_energy)
|
||||||
print*,'Det energy', uncorr_energy
|
print*,'Det energy', uncorr_energy
|
||||||
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
|
|
||||||
|
call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
|
||||||
print*,'Guess energy', uncorr_energy+energy, energy
|
print*,'Guess energy', uncorr_energy+energy, energy
|
||||||
|
|
||||||
nb_iter = 0
|
nb_iter = 0
|
||||||
@ -120,43 +205,45 @@ subroutine run_ccsd_space_orb
|
|||||||
do while (not_converged)
|
do while (not_converged)
|
||||||
|
|
||||||
! Residue
|
! Residue
|
||||||
if (do_ao_cholesky) then
|
if (do_mo_cholesky) then
|
||||||
! if (.False.) then
|
call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo)
|
||||||
call compute_H_oo_chol(nO,nV,tau_x,H_oo)
|
call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, d_cc_space_v_ov_chol,H_vv)
|
||||||
call compute_H_vv_chol(nO,nV,tau_x,H_vv)
|
call compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo)
|
||||||
call compute_H_vo_chol(nO,nV,t1,H_vo)
|
|
||||||
|
|
||||||
call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, &
|
||||||
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol)
|
||||||
|
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, &
|
||||||
|
d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, &
|
||||||
|
d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, &
|
||||||
|
d_cc_space_f_vo, &
|
||||||
|
r2, max_r2)
|
||||||
else
|
else
|
||||||
call compute_H_oo(nO,nV,t1,t2,tau,H_oo)
|
call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f)
|
||||||
call compute_H_vv(nO,nV,t1,t2,tau,H_vv)
|
call compute_H_vv(nO,nV,t1%f,t2%f,tau%f,H_vv%f)
|
||||||
call compute_H_vo(nO,nV,t1,t2,H_vo)
|
call compute_H_vo(nO,nV,t1%f,t2%f,H_vo%f)
|
||||||
|
|
||||||
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
|
call compute_r1_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r1%f,max_r1)
|
||||||
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
call compute_r2_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r2%f,max_r2)
|
||||||
endif
|
endif
|
||||||
max_r = max(max_r1,max_r2)
|
max_r = max(max_r1,max_r2)
|
||||||
|
|
||||||
! Update
|
! Update
|
||||||
if (cc_update_method == 'diis') then
|
if (cc_update_method == 'diis') then
|
||||||
!call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
|
call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t)
|
||||||
!call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
|
|
||||||
call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t)
|
|
||||||
|
|
||||||
! Standard update as T = T - Delta
|
! Standard update as T = T - Delta
|
||||||
elseif (cc_update_method == 'none') then
|
elseif (cc_update_method == 'none') then
|
||||||
call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1)
|
call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1%f,t1%f)
|
||||||
call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2)
|
call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2%f,t2%f)
|
||||||
else
|
else
|
||||||
print*,'Unkown cc_method_method: '//cc_update_method
|
print*,'Unkown cc_method_method: '//cc_update_method
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call update_tau_space(nO,nV,t1,t2,tau)
|
call update_tau_space(nO,nV,t1%f,t1,t2,tau)
|
||||||
call update_tau_x_space(nO,nV,tau,tau_x)
|
call update_tau_x_space(nO,nV,tau,tau_x)
|
||||||
|
|
||||||
! Energy
|
! Energy
|
||||||
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
|
call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
|
||||||
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
|
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
|
||||||
|
|
||||||
nb_iter = nb_iter + 1
|
nb_iter = nb_iter + 1
|
||||||
@ -181,8 +268,8 @@ subroutine run_ccsd_space_orb
|
|||||||
print*,''
|
print*,''
|
||||||
|
|
||||||
if (write_amplitudes) then
|
if (write_amplitudes) then
|
||||||
call write_t1(nO,nV,t1)
|
call write_t1(nO,nV,t1%f)
|
||||||
call write_t2(nO,nV,t2)
|
call write_t2(nO,nV,t2%f)
|
||||||
call ezfio_set_utils_cc_io_amplitudes('Read')
|
call ezfio_set_utils_cc_io_amplitudes('Read')
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@ -191,7 +278,14 @@ subroutine run_ccsd_space_orb
|
|||||||
deallocate(all_err,all_t)
|
deallocate(all_err,all_t)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
deallocate(H_vv,H_oo,H_vo,r1,r2,tau)
|
call gpu_deallocate(H_oo)
|
||||||
|
call gpu_deallocate(H_vv)
|
||||||
|
call gpu_deallocate(H_vo)
|
||||||
|
|
||||||
|
call gpu_deallocate(r1)
|
||||||
|
call gpu_deallocate(r2)
|
||||||
|
call gpu_deallocate(tau)
|
||||||
|
call gpu_deallocate(tau_x)
|
||||||
|
|
||||||
! CCSD(T)
|
! CCSD(T)
|
||||||
double precision :: e_t, e_t_err
|
double precision :: e_t, e_t_err
|
||||||
@ -199,28 +293,14 @@ subroutine run_ccsd_space_orb
|
|||||||
|
|
||||||
if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then
|
if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then
|
||||||
|
|
||||||
! Dumb way
|
|
||||||
!call wall_time(ta)
|
|
||||||
!call ccsd_par_t_space(nO,nV,t1,t2,e_t)
|
|
||||||
!call wall_time(tb)
|
|
||||||
!print*,'Time: ',tb-ta, ' s'
|
|
||||||
|
|
||||||
!print*,''
|
|
||||||
!write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha'
|
|
||||||
!write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha'
|
|
||||||
!write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
|
|
||||||
!print*,''
|
|
||||||
|
|
||||||
! New
|
! New
|
||||||
e_t = uncorr_energy + energy ! For print in (T) call
|
e_t = uncorr_energy + energy ! For print in (T) call
|
||||||
e_t_err = 0.d0
|
e_t_err = 0.d0
|
||||||
|
|
||||||
print*,'Computing (T) correction...'
|
print*,'Computing (T) correction...'
|
||||||
call wall_time(ta)
|
call wall_time(ta)
|
||||||
! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
|
|
||||||
! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
|
|
||||||
|
|
||||||
call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
|
call ccsd_par_t_space_stoch(nO,nV,t1%f,t2%f,cc_space_f_o,cc_space_f_v &
|
||||||
,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err)
|
,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err)
|
||||||
|
|
||||||
call wall_time(tb)
|
call wall_time(tb)
|
||||||
@ -235,168 +315,161 @@ subroutine run_ccsd_space_orb
|
|||||||
|
|
||||||
call save_energy(uncorr_energy + energy, e_t)
|
call save_energy(uncorr_energy + energy, e_t)
|
||||||
|
|
||||||
deallocate(t1,t2)
|
deallocate(h_t1, h_t2)
|
||||||
|
if (do_mo_cholesky) then
|
||||||
|
call gpu_deallocate(d_cc_space_v_oo_chol)
|
||||||
|
call gpu_deallocate(d_cc_space_v_ov_chol)
|
||||||
|
call gpu_deallocate(d_cc_space_v_vo_chol)
|
||||||
|
call gpu_deallocate(d_cc_space_v_vv_chol)
|
||||||
|
endif
|
||||||
|
|
||||||
|
call gpu_deallocate(d_cc_space_v_oovv)
|
||||||
|
call gpu_deallocate(d_cc_space_v_voov)
|
||||||
|
call gpu_deallocate(d_cc_space_v_ovov)
|
||||||
|
call gpu_deallocate(d_cc_space_v_oovo)
|
||||||
|
call gpu_deallocate(d_cc_space_v_ovvo)
|
||||||
|
call gpu_deallocate(d_cc_space_v_vooo)
|
||||||
|
call gpu_deallocate(d_cc_space_v_oooo)
|
||||||
|
call gpu_deallocate(d_cc_space_v_vvoo)
|
||||||
|
call gpu_deallocate(d_cc_space_v_ovoo)
|
||||||
|
|
||||||
|
call gpu_deallocate(d_cc_space_f_oo)
|
||||||
|
call gpu_deallocate(d_cc_space_f_vo)
|
||||||
|
call gpu_deallocate(d_cc_space_f_ov)
|
||||||
|
call gpu_deallocate(d_cc_space_f_vv)
|
||||||
|
|
||||||
|
call gpu_deallocate(t1)
|
||||||
|
call gpu_deallocate(t2)
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
! Energy
|
! Energy
|
||||||
|
|
||||||
subroutine ccsd_energy_space(nO,nV,tau,t1,energy)
|
subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
|
||||||
|
use gpu
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
integer, intent(in) :: nO, nV
|
integer, intent(in) :: nO, nV
|
||||||
double precision, intent(in) :: tau(nO,nO,nV,nV)
|
type(gpu_double4), intent(in) :: tau_x, d_cc_space_v_oovv
|
||||||
double precision, intent(in) :: t1(nO,nV)
|
type(gpu_double2), intent(in) :: t1, d_cc_space_f_vo
|
||||||
double precision, intent(out) :: energy
|
double precision, intent(out) :: energy
|
||||||
|
|
||||||
! internal
|
! internal
|
||||||
integer :: i,j,a,b
|
integer :: i,j,a,b
|
||||||
double precision :: e
|
double precision :: e
|
||||||
|
|
||||||
energy = 0d0
|
type(gpu_stream) :: s1, s2
|
||||||
!$omp parallel &
|
call gpu_stream_create(s1)
|
||||||
!$omp shared(nO,nV,energy,tau,t1,&
|
call gpu_stream_create(s2)
|
||||||
!$omp cc_space_f_vo,cc_space_w_oovv) &
|
|
||||||
!$omp private(i,j,a,b,e) &
|
|
||||||
!$omp default(none)
|
|
||||||
e = 0d0
|
|
||||||
!$omp do
|
|
||||||
do a = 1, nV
|
|
||||||
do i = 1, nO
|
|
||||||
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
!$omp end do nowait
|
|
||||||
!$omp do
|
|
||||||
do b = 1, nV
|
|
||||||
do a = 1, nV
|
|
||||||
do j = 1, nO
|
|
||||||
do i = 1, nO
|
|
||||||
e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
!$omp end do nowait
|
|
||||||
!$omp critical
|
|
||||||
energy = energy + e
|
|
||||||
!$omp end critical
|
|
||||||
!$omp end parallel
|
|
||||||
|
|
||||||
end
|
call gpu_set_stream(blas_handle,s1)
|
||||||
|
call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo%f(1,1), 1, t1%f(1,1), 1, e)
|
||||||
|
|
||||||
subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
|
call gpu_set_stream(blas_handle,s2)
|
||||||
|
call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x%f(1,1,1,1), 1_8, d_cc_space_v_oovv%f(1,1,1,1), 1_8, energy)
|
||||||
|
call gpu_set_stream(blas_handle,gpu_default_stream)
|
||||||
|
|
||||||
implicit none
|
call gpu_synchronize()
|
||||||
|
call gpu_stream_destroy(s1)
|
||||||
|
call gpu_stream_destroy(s2)
|
||||||
|
|
||||||
integer, intent(in) :: nO, nV
|
energy = energy + 2.d0*e
|
||||||
double precision, intent(in) :: tau_x(nO,nO,nV,nV)
|
|
||||||
double precision, intent(in) :: t1(nO,nV)
|
|
||||||
double precision, intent(out) :: energy
|
|
||||||
|
|
||||||
! internal
|
|
||||||
integer :: i,j,a,b
|
|
||||||
double precision :: e
|
|
||||||
|
|
||||||
energy = 0d0
|
|
||||||
!$omp parallel &
|
|
||||||
!$omp shared(nO,nV,energy,tau_x,t1,&
|
|
||||||
!$omp cc_space_f_vo,cc_space_v_oovv) &
|
|
||||||
!$omp private(i,j,a,b,e) &
|
|
||||||
!$omp default(none)
|
|
||||||
e = 0d0
|
|
||||||
!$omp do
|
|
||||||
do a = 1, nV
|
|
||||||
do i = 1, nO
|
|
||||||
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
!$omp end do nowait
|
|
||||||
!$omp do
|
|
||||||
do b = 1, nV
|
|
||||||
do a = 1, nV
|
|
||||||
do j = 1, nO
|
|
||||||
do i = 1, nO
|
|
||||||
e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b)
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
!$omp end do nowait
|
|
||||||
!$omp critical
|
|
||||||
energy = energy + e
|
|
||||||
!$omp end critical
|
|
||||||
!$omp end parallel
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
! Tau
|
! Tau
|
||||||
|
|
||||||
subroutine update_tau_space(nO,nV,t1,t2,tau)
|
subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau)
|
||||||
|
use gpu
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
! in
|
! in
|
||||||
integer, intent(in) :: nO, nV
|
integer, intent(in) :: nO, nV
|
||||||
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV)
|
double precision, intent(in) :: h_t1(nO,nV)
|
||||||
|
type(gpu_double2), intent(in) :: t1
|
||||||
|
type(gpu_double4), intent(in) :: t2
|
||||||
|
|
||||||
! out
|
! out
|
||||||
double precision, intent(out) :: tau(nO,nO,nV,nV)
|
type(gpu_double4) :: tau
|
||||||
|
|
||||||
! internal
|
! internal
|
||||||
integer :: i,j,a,b
|
integer :: i,j,a,b
|
||||||
|
|
||||||
|
type(gpu_stream) :: stream(nV)
|
||||||
|
|
||||||
!$OMP PARALLEL &
|
!$OMP PARALLEL &
|
||||||
!$OMP SHARED(nO,nV,tau,t2,t1) &
|
!$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) &
|
||||||
!$OMP PRIVATE(i,j,a,b) &
|
!$OMP PRIVATE(i,j,a,b) &
|
||||||
!$OMP DEFAULT(NONE)
|
!$OMP DEFAULT(NONE)
|
||||||
!$OMP DO
|
!$OMP DO
|
||||||
do b = 1, nV
|
do b=1,nV
|
||||||
do a = 1, nV
|
call gpu_stream_create(stream(b))
|
||||||
do j = 1, nO
|
call gpu_set_stream(blas_handle,stream(b))
|
||||||
do i = 1, nO
|
do j=1,nO
|
||||||
tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b)
|
call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, &
|
||||||
enddo
|
1.d0, t2%f(1,j,1,b), nO*nO, &
|
||||||
enddo
|
h_t1(j,b), t1%f(1,1), nO, &
|
||||||
|
tau%f(1,j,1,b), nO*nO)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
call gpu_synchronize()
|
||||||
|
|
||||||
|
do b=1,nV
|
||||||
|
call gpu_stream_destroy(stream(b))
|
||||||
|
enddo
|
||||||
|
call gpu_set_stream(blas_handle,gpu_default_stream)
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine update_tau_x_space(nO,nV,tau,tau_x)
|
subroutine update_tau_x_space(nO,nV,tau,tau_x)
|
||||||
|
use gpu
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
! in
|
! in
|
||||||
integer, intent(in) :: nO, nV
|
integer, intent(in) :: nO, nV
|
||||||
double precision, intent(in) :: tau(nO,nO,nV,nV)
|
type(gpu_double4), intent(in) :: tau
|
||||||
|
|
||||||
! out
|
! out
|
||||||
double precision, intent(out) :: tau_x(nO,nO,nV,nV)
|
type(gpu_double4) :: tau_x
|
||||||
|
|
||||||
! internal
|
! internal
|
||||||
integer :: i,j,a,b
|
integer :: i,j,a,b
|
||||||
|
|
||||||
|
type(gpu_stream) :: stream(nV)
|
||||||
|
|
||||||
|
do a=1,nV
|
||||||
|
call gpu_stream_create(stream(a))
|
||||||
|
enddo
|
||||||
|
|
||||||
!$OMP PARALLEL &
|
!$OMP PARALLEL &
|
||||||
!$OMP SHARED(nO,nV,tau,tau_x) &
|
!$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) &
|
||||||
!$OMP PRIVATE(i,j,a,b) &
|
!$OMP PRIVATE(a,b) &
|
||||||
!$OMP DEFAULT(NONE)
|
!$OMP DEFAULT(NONE)
|
||||||
!$OMP DO
|
!$OMP DO
|
||||||
do b = 1, nV
|
do b=1,nV
|
||||||
do a = 1, nV
|
do a=1,nV
|
||||||
do j = 1, nO
|
call gpu_set_stream(blas_handle,stream(a))
|
||||||
do i = 1, nO
|
call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, &
|
||||||
tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a)
|
2.d0, tau%f(1,1,a,b), nO, &
|
||||||
enddo
|
-1.d0, tau%f(1,1,b,a), nO, &
|
||||||
enddo
|
tau_x%f(1,1,a,b), nO)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
call gpu_set_stream(blas_handle,gpu_default_stream)
|
||||||
|
call gpu_synchronize()
|
||||||
|
|
||||||
|
do b=1,nV
|
||||||
|
call gpu_stream_destroy(stream(b))
|
||||||
|
enddo
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
! R1
|
! R1
|
||||||
|
File diff suppressed because it is too large
Load Diff
1
src/gpu/NEED
Normal file
1
src/gpu/NEED
Normal file
@ -0,0 +1 @@
|
|||||||
|
gpu_arch
|
6
src/gpu/README.rst
Normal file
6
src/gpu/README.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
===
|
||||||
|
gpu
|
||||||
|
===
|
||||||
|
|
||||||
|
Bindings for GPU routines (architecture independent).
|
||||||
|
Architecture-dependent files are in gpu_arch.
|
@ -22,20 +22,20 @@ void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_
|
|||||||
|
|
||||||
void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result);
|
void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result);
|
||||||
|
|
||||||
void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha,
|
void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double* alpha,
|
||||||
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy);
|
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy);
|
||||||
|
|
||||||
void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha,
|
void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float* alpha,
|
||||||
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy);
|
const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy);
|
||||||
|
|
||||||
void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha,
|
void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
|
||||||
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc);
|
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc);
|
||||||
|
|
||||||
void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha,
|
void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
|
||||||
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc);
|
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc);
|
||||||
|
|
||||||
void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha,
|
void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double* alpha,
|
||||||
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc);
|
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc);
|
||||||
|
|
||||||
void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha,
|
void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float* alpha,
|
||||||
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc);
|
const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc);
|
26
src/gpu/gpu.irp.f
Normal file
26
src/gpu/gpu.irp.f
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
use gpu
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ type(gpu_blas), blas_handle ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Handle for cuBLAS or RocBLAS
|
||||||
|
END_DOC
|
||||||
|
call gpu_blas_create(blas_handle)
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ type(gpu_stream), gpu_default_stream ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Default stream
|
||||||
|
END_DOC
|
||||||
|
gpu_default_stream%c = C_NULL_PTR
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ integer, gpu_num ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Number of usable GPUs
|
||||||
|
END_DOC
|
||||||
|
gpu_num = gpu_ndevices()
|
||||||
|
END_PROVIDER
|
||||||
|
|
707
src/gpu/gpu_module.F90
Normal file
707
src/gpu/gpu_module.F90
Normal file
@ -0,0 +1,707 @@
|
|||||||
|
module gpu
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Data types
|
||||||
|
! ----------
|
||||||
|
|
||||||
|
type gpu_double1
|
||||||
|
type(c_ptr) :: c
|
||||||
|
double precision, pointer :: f(:)
|
||||||
|
end type
|
||||||
|
|
||||||
|
type gpu_double2
|
||||||
|
type(c_ptr) :: c
|
||||||
|
double precision, pointer :: f(:,:)
|
||||||
|
end type
|
||||||
|
|
||||||
|
type gpu_double3
|
||||||
|
type(c_ptr) :: c
|
||||||
|
double precision, pointer :: f(:,:,:)
|
||||||
|
end type
|
||||||
|
|
||||||
|
type gpu_double4
|
||||||
|
type(c_ptr) :: c
|
||||||
|
double precision, pointer :: f(:,:,:,:)
|
||||||
|
end type
|
||||||
|
|
||||||
|
type gpu_double5
|
||||||
|
type(c_ptr) :: c
|
||||||
|
double precision, pointer :: f(:,:,:,:,:)
|
||||||
|
end type
|
||||||
|
|
||||||
|
type gpu_double6
|
||||||
|
type(c_ptr) :: c
|
||||||
|
double precision, pointer :: f(:,:,:,:,:,:)
|
||||||
|
end type
|
||||||
|
|
||||||
|
|
||||||
|
type gpu_blas
|
||||||
|
type(c_ptr) :: c
|
||||||
|
end type
|
||||||
|
|
||||||
|
type gpu_stream
|
||||||
|
type(c_ptr) :: c
|
||||||
|
end type
|
||||||
|
|
||||||
|
|
||||||
|
! C interfaces
|
||||||
|
! ------------
|
||||||
|
|
||||||
|
interface
|
||||||
|
logical(c_bool) function no_gpu() bind(C)
|
||||||
|
import
|
||||||
|
end function
|
||||||
|
|
||||||
|
integer function gpu_ndevices() bind(C)
|
||||||
|
import
|
||||||
|
end function
|
||||||
|
|
||||||
|
subroutine gpu_set_device(id) bind(C)
|
||||||
|
import
|
||||||
|
integer(c_int32_t), value :: id
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate')
|
||||||
|
import
|
||||||
|
type(c_ptr) :: ptr
|
||||||
|
integer(c_int64_t), value :: n
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate')
|
||||||
|
import
|
||||||
|
type(c_ptr) :: ptr
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload')
|
||||||
|
import
|
||||||
|
type(c_ptr), value :: cpu_ptr
|
||||||
|
type(c_ptr), value :: gpu_ptr
|
||||||
|
integer(c_int64_t), value :: n
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download')
|
||||||
|
import
|
||||||
|
type(c_ptr), value :: gpu_ptr
|
||||||
|
type(c_ptr), value :: cpu_ptr
|
||||||
|
integer(c_int64_t), value :: n
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy')
|
||||||
|
import
|
||||||
|
type(c_ptr), value :: gpu_ptr_src
|
||||||
|
type(c_ptr), value :: gpu_ptr_dest
|
||||||
|
integer(c_int64_t), value :: n
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_stream_create_c(stream) bind(C, name='gpu_stream_create')
|
||||||
|
import
|
||||||
|
type(c_ptr) :: stream
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_stream_destroy_c(stream) bind(C, name='gpu_stream_destroy')
|
||||||
|
import
|
||||||
|
type(c_ptr) :: stream
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_set_stream_c(handle, stream) bind(C, name='gpu_set_stream')
|
||||||
|
import
|
||||||
|
type(c_ptr), value :: handle, stream
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_synchronize() bind(C)
|
||||||
|
import
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_blas_create_c(handle) bind(C, name='gpu_blas_create')
|
||||||
|
import
|
||||||
|
type(c_ptr) :: handle
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_blas_destroy_c(handle) bind(C, name='gpu_blas_destroy')
|
||||||
|
import
|
||||||
|
type(c_ptr) :: handle
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
integer(c_int64_t), value :: n, incx, incy
|
||||||
|
type(c_ptr), value :: dx, dy
|
||||||
|
real(c_double), intent(out) :: res
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
integer(c_int64_t), value :: n, incx, incy
|
||||||
|
type(c_ptr), intent(in), value :: dx, dy
|
||||||
|
real(c_float), intent(out) :: res
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
|
||||||
|
b, ldb, c, ldc) bind(C, name='gpu_dgeam')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
character(c_char), intent(in), value :: transa, transb
|
||||||
|
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
|
||||||
|
real(c_double), intent(in) :: alpha, beta
|
||||||
|
type(c_ptr), value :: a, b, c
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_sgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
|
||||||
|
b, ldb, c, ldc) bind(C, name='gpu_sgeam')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
character(c_char), intent(in), value :: transa, transb
|
||||||
|
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
|
||||||
|
real(c_float), intent(in) :: alpha, beta
|
||||||
|
real(c_float) :: a, b, c
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_dgemv_c(handle, transa, m, n, alpha, a, lda, &
|
||||||
|
x, incx, beta, y, incy) bind(C, name='gpu_dgemv')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
character(c_char), intent(in) :: transa
|
||||||
|
integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy
|
||||||
|
real(c_double), intent(in) :: alpha, beta
|
||||||
|
real(c_double) :: a, x, y
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_sgemv_c(handle, transa, m, n, alpha, a, lda, &
|
||||||
|
x, incx, beta, y, incy) bind(C, name='gpu_sgemv')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
character(c_char), intent(in) :: transa
|
||||||
|
integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy
|
||||||
|
real(c_float), intent(in) :: alpha, beta
|
||||||
|
real(c_float) :: a, x, y
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
|
||||||
|
b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
character(c_char), intent(in) :: transa, transb
|
||||||
|
integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc
|
||||||
|
real(c_double), intent(in) :: alpha, beta
|
||||||
|
real(c_double) :: a, b, c
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
|
||||||
|
b, ldb, beta, c, ldc) bind(C, name='gpu_sgemm')
|
||||||
|
import
|
||||||
|
type(c_ptr), value, intent(in) :: handle
|
||||||
|
character(c_char), intent(in), value :: transa, transb
|
||||||
|
integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc
|
||||||
|
real(c_float), intent(in) :: alpha, beta
|
||||||
|
real(c_float) :: a, b, c
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
|
||||||
|
! Polymorphic interfaces
|
||||||
|
! ----------------------
|
||||||
|
|
||||||
|
interface gpu_allocate
|
||||||
|
procedure gpu_allocate_double1 &
|
||||||
|
,gpu_allocate_double2 &
|
||||||
|
,gpu_allocate_double3 &
|
||||||
|
,gpu_allocate_double4 &
|
||||||
|
,gpu_allocate_double5 &
|
||||||
|
,gpu_allocate_double6 &
|
||||||
|
,gpu_allocate_double1_64 &
|
||||||
|
,gpu_allocate_double2_64 &
|
||||||
|
,gpu_allocate_double3_64 &
|
||||||
|
,gpu_allocate_double4_64 &
|
||||||
|
,gpu_allocate_double5_64 &
|
||||||
|
,gpu_allocate_double6_64
|
||||||
|
end interface gpu_allocate
|
||||||
|
|
||||||
|
interface gpu_deallocate
|
||||||
|
procedure gpu_deallocate_double1 &
|
||||||
|
,gpu_deallocate_double2 &
|
||||||
|
,gpu_deallocate_double3 &
|
||||||
|
,gpu_deallocate_double4 &
|
||||||
|
,gpu_deallocate_double5 &
|
||||||
|
,gpu_deallocate_double6
|
||||||
|
end interface gpu_deallocate
|
||||||
|
|
||||||
|
interface gpu_upload
|
||||||
|
procedure gpu_upload_double1 &
|
||||||
|
,gpu_upload_double2 &
|
||||||
|
,gpu_upload_double3 &
|
||||||
|
,gpu_upload_double4 &
|
||||||
|
,gpu_upload_double5 &
|
||||||
|
,gpu_upload_double6
|
||||||
|
end interface gpu_upload
|
||||||
|
|
||||||
|
interface gpu_download
|
||||||
|
procedure gpu_download_double1 &
|
||||||
|
,gpu_download_double2 &
|
||||||
|
,gpu_download_double3 &
|
||||||
|
,gpu_download_double4 &
|
||||||
|
,gpu_download_double5 &
|
||||||
|
,gpu_download_double6
|
||||||
|
end interface gpu_download
|
||||||
|
|
||||||
|
interface gpu_copy
|
||||||
|
procedure gpu_copy_double1 &
|
||||||
|
,gpu_copy_double2 &
|
||||||
|
,gpu_copy_double3 &
|
||||||
|
,gpu_copy_double4 &
|
||||||
|
,gpu_copy_double5 &
|
||||||
|
,gpu_copy_double6
|
||||||
|
end interface gpu_copy
|
||||||
|
|
||||||
|
|
||||||
|
contains
|
||||||
|
|
||||||
|
|
||||||
|
! gpu_allocate
|
||||||
|
! ------------
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double1(ptr, s)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double1), intent(inout) :: ptr
|
||||||
|
integer, intent(in) :: s
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double2(ptr, s1, s2)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double2), intent(inout) :: ptr
|
||||||
|
integer, intent(in) :: s1, s2
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double3(ptr, s1, s2, s3)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double3), intent(inout) :: ptr
|
||||||
|
integer, intent(in) :: s1, s2, s3
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double4(ptr, s1, s2, s3, s4)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double4), intent(inout) :: ptr
|
||||||
|
integer, intent(in) :: s1, s2, s3, s4
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double5(ptr, s1, s2, s3, s4, s5)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double5), intent(inout) :: ptr
|
||||||
|
integer, intent(in) :: s1, s2, s3, s4, s5
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double6(ptr, s1, s2, s3, s4, s5, s6)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double6), intent(inout) :: ptr
|
||||||
|
integer, intent(in) :: s1, s2, s3, s4, s5, s6
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double1_64(ptr, s)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double1), intent(inout) :: ptr
|
||||||
|
integer*8, intent(in) :: s
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double2_64(ptr, s1, s2)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double2), intent(inout) :: ptr
|
||||||
|
integer*8, intent(in) :: s1, s2
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double3_64(ptr, s1, s2, s3)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double3), intent(inout) :: ptr
|
||||||
|
integer*8, intent(in) :: s1, s2, s3
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double4_64(ptr, s1, s2, s3, s4)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double4), intent(inout) :: ptr
|
||||||
|
integer*8, intent(in) :: s1, s2, s3, s4
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double5_64(ptr, s1, s2, s3, s4, s5)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double5), intent(inout) :: ptr
|
||||||
|
integer*8, intent(in) :: s1, s2, s3, s4, s5
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_allocate_double6_64(ptr, s1, s2, s3, s4, s5, s6)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double6), intent(inout) :: ptr
|
||||||
|
integer*8, intent(in) :: s1, s2, s3, s4, s5, s6
|
||||||
|
|
||||||
|
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8)
|
||||||
|
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gpu_deallocate
|
||||||
|
! --------------
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_double1(ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double1), intent(inout) :: ptr
|
||||||
|
call gpu_deallocate_c(ptr%c)
|
||||||
|
NULLIFY(ptr%f)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_double2(ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double2), intent(inout) :: ptr
|
||||||
|
call gpu_deallocate_c(ptr%c)
|
||||||
|
NULLIFY(ptr%f)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_double3(ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double3), intent(inout) :: ptr
|
||||||
|
call gpu_deallocate_c(ptr%c)
|
||||||
|
NULLIFY(ptr%f)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_double4(ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double4), intent(inout) :: ptr
|
||||||
|
call gpu_deallocate_c(ptr%c)
|
||||||
|
NULLIFY(ptr%f)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_double5(ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double5), intent(inout) :: ptr
|
||||||
|
call gpu_deallocate_c(ptr%c)
|
||||||
|
NULLIFY(ptr%f)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_deallocate_double6(ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double6), intent(inout) :: ptr
|
||||||
|
call gpu_deallocate_c(ptr%c)
|
||||||
|
NULLIFY(ptr%f)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gpu_upload
|
||||||
|
! ----------
|
||||||
|
|
||||||
|
subroutine gpu_upload_double1(cpu_ptr, gpu_ptr)
|
||||||
|
implicit none
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(*)
|
||||||
|
type(gpu_double1), intent(in) :: gpu_ptr
|
||||||
|
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*size(gpu_ptr%f))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_upload_double2(cpu_ptr, gpu_ptr)
|
||||||
|
implicit none
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:)
|
||||||
|
type(gpu_double2), intent(in) :: gpu_ptr
|
||||||
|
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_upload_double3(cpu_ptr, gpu_ptr)
|
||||||
|
implicit none
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:)
|
||||||
|
type(gpu_double3), intent(in) :: gpu_ptr
|
||||||
|
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_upload_double4(cpu_ptr, gpu_ptr)
|
||||||
|
implicit none
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:,:)
|
||||||
|
type(gpu_double4), intent(in) :: gpu_ptr
|
||||||
|
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_upload_double5(cpu_ptr, gpu_ptr)
|
||||||
|
implicit none
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:)
|
||||||
|
type(gpu_double5), intent(in) :: gpu_ptr
|
||||||
|
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_upload_double6(cpu_ptr, gpu_ptr)
|
||||||
|
implicit none
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:)
|
||||||
|
type(gpu_double6), intent(in) :: gpu_ptr
|
||||||
|
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gpu_download
|
||||||
|
! ------------
|
||||||
|
|
||||||
|
subroutine gpu_download_double1(gpu_ptr, cpu_ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double1), intent(in) :: gpu_ptr
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:)
|
||||||
|
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*size(gpu_ptr%f))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_download_double2(gpu_ptr, cpu_ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double2), intent(in) :: gpu_ptr
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:)
|
||||||
|
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_download_double3(gpu_ptr, cpu_ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double3), intent(in) :: gpu_ptr
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:)
|
||||||
|
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_download_double4(gpu_ptr, cpu_ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double4), intent(in) :: gpu_ptr
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:,:)
|
||||||
|
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_download_double5(gpu_ptr, cpu_ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double5), intent(in) :: gpu_ptr
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:)
|
||||||
|
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_download_double6(gpu_ptr, cpu_ptr)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double6), intent(in) :: gpu_ptr
|
||||||
|
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:)
|
||||||
|
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
! gpu_copy
|
||||||
|
! --------
|
||||||
|
|
||||||
|
subroutine gpu_copy_double1(gpu_ptr_src, gpu_ptr_dest)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double1), intent(in) :: gpu_ptr_src
|
||||||
|
type(gpu_double1), intent(in) :: gpu_ptr_dest
|
||||||
|
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*size(gpu_ptr_dest%f))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_copy_double2(gpu_ptr_src, gpu_ptr_dest)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double2), intent(in) :: gpu_ptr_src
|
||||||
|
type(gpu_double2), intent(in) :: gpu_ptr_dest
|
||||||
|
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_copy_double3(gpu_ptr_src, gpu_ptr_dest)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double3), intent(in) :: gpu_ptr_src
|
||||||
|
type(gpu_double3), intent(in) :: gpu_ptr_dest
|
||||||
|
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_copy_double4(gpu_ptr_src, gpu_ptr_dest)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double4), intent(in) :: gpu_ptr_src
|
||||||
|
type(gpu_double4), intent(in) :: gpu_ptr_dest
|
||||||
|
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_copy_double5(gpu_ptr_src, gpu_ptr_dest)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double5), intent(in) :: gpu_ptr_src
|
||||||
|
type(gpu_double5), intent(in) :: gpu_ptr_dest
|
||||||
|
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_copy_double6(gpu_ptr_src, gpu_ptr_dest)
|
||||||
|
implicit none
|
||||||
|
type(gpu_double6), intent(in) :: gpu_ptr_src
|
||||||
|
type(gpu_double6), intent(in) :: gpu_ptr_dest
|
||||||
|
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gpu_stream
|
||||||
|
! ----------
|
||||||
|
|
||||||
|
subroutine gpu_stream_create(stream)
|
||||||
|
type(gpu_stream) :: stream
|
||||||
|
call gpu_stream_create_c(stream%c)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_stream_destroy(stream)
|
||||||
|
type(gpu_stream) :: stream
|
||||||
|
call gpu_stream_destroy_c(stream%c)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_set_stream(handle, stream)
|
||||||
|
type(gpu_blas) :: handle
|
||||||
|
type(gpu_stream) :: stream
|
||||||
|
call gpu_set_stream_c(handle%c, stream%c)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gpu_blas
|
||||||
|
! --------
|
||||||
|
|
||||||
|
subroutine gpu_blas_create(handle)
|
||||||
|
type(gpu_blas) :: handle
|
||||||
|
call gpu_blas_create_c(handle%c)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_blas_destroy(handle)
|
||||||
|
type(gpu_blas) :: handle
|
||||||
|
call gpu_blas_destroy_c(handle%c)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! dot
|
||||||
|
! ---
|
||||||
|
|
||||||
|
subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
integer*4 :: n, incx, incy
|
||||||
|
double precision, target :: dx, dy
|
||||||
|
double precision, intent(out) :: res
|
||||||
|
call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
integer*8 :: n, incx, incy
|
||||||
|
double precision, target :: dx, dy
|
||||||
|
double precision, intent(out) :: res
|
||||||
|
call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res)
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! geam
|
||||||
|
! ----
|
||||||
|
|
||||||
|
subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, &
|
||||||
|
b, ldb, c, ldc)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
character, intent(in) :: transa, transb
|
||||||
|
integer*4, intent(in) :: m, n, lda, ldb, ldc
|
||||||
|
double precision, intent(in) :: alpha, beta
|
||||||
|
double precision, target :: a, b, c
|
||||||
|
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, &
|
||||||
|
c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, &
|
||||||
|
b, ldb, c, ldc)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
character, intent(in) :: transa, transb
|
||||||
|
integer*8, intent(in) :: m, n, lda, ldb, ldc
|
||||||
|
double precision, intent(in) :: alpha, beta
|
||||||
|
double precision, target :: a, b, c
|
||||||
|
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, &
|
||||||
|
c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gemv
|
||||||
|
! ----
|
||||||
|
|
||||||
|
subroutine gpu_dgemv(handle, transa, m, n, alpha, a, lda, &
|
||||||
|
x, incx, beta, y, incy)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
character, intent(in) :: transa
|
||||||
|
integer*4, intent(in) :: m, n, lda, incx, incy
|
||||||
|
double precision, intent(in) :: alpha, beta
|
||||||
|
double precision :: a, x, y
|
||||||
|
call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
|
||||||
|
alpha, a, int(lda,c_int64_t), &
|
||||||
|
x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_dgemv_64(handle, transa, m, n, alpha, a, lda, &
|
||||||
|
x, incx, beta, y, incy)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
character, intent(in) :: transa
|
||||||
|
integer*8, intent(in) :: m, n, lda, incx, incy
|
||||||
|
double precision, intent(in) :: alpha, beta
|
||||||
|
double precision :: a, x, y
|
||||||
|
call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
|
||||||
|
alpha, a, int(lda,c_int64_t), &
|
||||||
|
x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
! gemm
|
||||||
|
! ----
|
||||||
|
|
||||||
|
subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, &
|
||||||
|
b, ldb, beta, c, ldc)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
character, intent(in) :: transa, transb
|
||||||
|
integer*4, intent(in) :: m, n, k, lda, ldb, ldc
|
||||||
|
double precision, intent(in) :: alpha, beta
|
||||||
|
double precision :: a, b, c
|
||||||
|
call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
|
||||||
|
alpha, a, int(lda,c_int64_t), &
|
||||||
|
b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, &
|
||||||
|
b, ldb, beta, c, ldc)
|
||||||
|
! use gpu
|
||||||
|
type(gpu_blas), intent(in) :: handle
|
||||||
|
character, intent(in) :: transa, transb
|
||||||
|
integer*8, intent(in) :: m, n, k, lda, ldb, ldc
|
||||||
|
double precision, intent(in) :: alpha, beta
|
||||||
|
double precision :: a, b, c
|
||||||
|
call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
|
||||||
|
alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
end module
|
@ -1,141 +0,0 @@
|
|||||||
module gpu
|
|
||||||
use, intrinsic :: iso_c_binding, only : c_int32_t, c_int64_t, c_double, c_size_t, c_char
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
interface
|
|
||||||
integer function gpu_ndevices() bind(C)
|
|
||||||
end function
|
|
||||||
|
|
||||||
subroutine gpu_set_device(id) bind(C)
|
|
||||||
import
|
|
||||||
integer(c_int32_t), value :: id
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate')
|
|
||||||
import
|
|
||||||
type(c_ptr) :: ptr
|
|
||||||
integer(c_int64_t), value :: n
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_free_c(ptr) bind(C, name='gpu_free')
|
|
||||||
import
|
|
||||||
type(c_ptr) :: ptr
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload')
|
|
||||||
import
|
|
||||||
type(c_ptr), value :: cpu_ptr
|
|
||||||
type(c_ptr), value :: gpu_ptr
|
|
||||||
integer(c_int64_t), value :: n
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download')
|
|
||||||
import
|
|
||||||
type(c_ptr), value :: gpu_ptr
|
|
||||||
type(c_ptr), value :: cpu_ptr
|
|
||||||
integer(c_int64_t), value :: n
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy')
|
|
||||||
import
|
|
||||||
type(c_ptr), value :: gpu_ptr_src
|
|
||||||
type(c_ptr), value :: gpu_ptr_dest
|
|
||||||
integer(c_int64_t), value :: n
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_stream_create(stream) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr) :: stream
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_stream_destroy(stream) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr) :: stream
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_set_stream(handle, stream) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr) :: handle, stream
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_synchronize()
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_blas_create(handle) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr) :: handle
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_blas_destroy(handle) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr) :: handle
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr), intent(in) :: handle
|
|
||||||
integer(c_int64_t), value :: n, incx, incy
|
|
||||||
real(c_double), intent(in) :: dx(*), dy(*)
|
|
||||||
real(c_double), intent(out) :: res
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) bind(C)
|
|
||||||
import
|
|
||||||
type(c_ptr), intent(in) :: handle
|
|
||||||
integer(c_int64_t), value :: n, incx, incy
|
|
||||||
real(c_float), intent(in) :: dx(*), dy(*)
|
|
||||||
real(c_float), intent(out) :: res
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
end interface
|
|
||||||
|
|
||||||
end module
|
|
||||||
|
|
||||||
subroutine gpu_allocate_double(ptr, s)
|
|
||||||
use gpu
|
|
||||||
implicit none
|
|
||||||
double precision, pointer, intent(inout) :: ptr
|
|
||||||
integer*8, intent(in) :: s(*)
|
|
||||||
type(c_ptr) :: cptr
|
|
||||||
|
|
||||||
call gpu_allocate_c(cptr, sum(s)*8_8)
|
|
||||||
call c_f_pointer(cptr, ptr, s)
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_free_double(ptr)
|
|
||||||
use gpu
|
|
||||||
implicit none
|
|
||||||
double precision, pointer, intent(inout) :: ptr
|
|
||||||
type(c_ptr) :: cptr
|
|
||||||
cptr = cloc(ptr)
|
|
||||||
call gpu_free(cptr)
|
|
||||||
NULLIFY(ptr)
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n)
|
|
||||||
use gpu
|
|
||||||
implicit none
|
|
||||||
double precision, intent(in) :: cpu_ptr(*)
|
|
||||||
double precision, intent(out) :: gpu_ptr(*)
|
|
||||||
integer(c_int64_t), intent(in) :: n
|
|
||||||
call gpu_upload_c(cpu_ptr, gpu_ptr, 8_8*n)
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_download_double(gpu_ptr, cpu_ptr, n)
|
|
||||||
use gpu
|
|
||||||
implicit none
|
|
||||||
double precision, intent(in) :: gpu_ptr(*)
|
|
||||||
double precision, intent(out) :: cpu_ptr(*)
|
|
||||||
integer(c_int64_t), intent(in) :: n
|
|
||||||
call gpu_download_c(gpu_ptr, cpu_ptr, 8_8*n)
|
|
||||||
end subroutine
|
|
||||||
|
|
||||||
subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n)
|
|
||||||
use gpu
|
|
||||||
implicit none
|
|
||||||
double precision, intent(in) :: gpu_ptr_src(*)
|
|
||||||
double precision, intent(out) :: gpu_ptr_dest(*)
|
|
||||||
integer(c_int64_t), intent(in) :: n
|
|
||||||
call gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, 8_8*n)
|
|
||||||
end subroutine
|
|
||||||
|
|
@ -1,7 +1,3 @@
|
|||||||
two_body_rdm
|
mo_optimization_utils
|
||||||
hartree_fock
|
|
||||||
cipsi
|
|
||||||
davidson_undressed
|
|
||||||
selectors_full
|
selectors_full
|
||||||
generators_full
|
generators_full
|
||||||
utils_trust_region
|
|
||||||
|
@ -2,87 +2,7 @@ program optimization
|
|||||||
|
|
||||||
read_wf = .true. ! must be True for the orbital optimization !!!
|
read_wf = .true. ! must be True for the orbital optimization !!!
|
||||||
TOUCH read_wf
|
TOUCH read_wf
|
||||||
call run_optimization
|
call run_optimization_mos_CIPSI
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine run_optimization
|
|
||||||
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
double precision :: e_cipsi, e_opt, delta_e
|
|
||||||
double precision, allocatable :: Ev(:),PT2(:)
|
|
||||||
integer :: nb_iter,i
|
|
||||||
logical :: not_converged
|
|
||||||
character (len=100) :: filename
|
|
||||||
|
|
||||||
PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals
|
|
||||||
allocate(Ev(N_states),PT2(N_states))
|
|
||||||
|
|
||||||
not_converged = .True.
|
|
||||||
nb_iter = 0
|
|
||||||
|
|
||||||
! To start from the wf
|
|
||||||
N_det_max = max(n_det,5)
|
|
||||||
TOUCH N_det_max
|
|
||||||
|
|
||||||
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt')
|
|
||||||
write(10,*) " Ndet E_cipsi E_opt Delta_e"
|
|
||||||
call state_average_energy(e_cipsi)
|
|
||||||
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0
|
|
||||||
close(10)
|
|
||||||
|
|
||||||
do while (not_converged)
|
|
||||||
print*,''
|
|
||||||
print*,'======================'
|
|
||||||
print*,' Cipsi step:', nb_iter
|
|
||||||
print*,'======================'
|
|
||||||
print*,''
|
|
||||||
print*,'********** cipsi step **********'
|
|
||||||
! cispi calculation
|
|
||||||
call run_stochastic_cipsi(Ev,PT2)
|
|
||||||
|
|
||||||
! State average energy after the cipsi step
|
|
||||||
call state_average_energy(e_cipsi)
|
|
||||||
|
|
||||||
print*,''
|
|
||||||
print*,'********** optimization step **********'
|
|
||||||
! orbital optimization
|
|
||||||
call run_orb_opt_trust_v2
|
|
||||||
|
|
||||||
! State average energy after the orbital optimization
|
|
||||||
call state_average_energy(e_opt)
|
|
||||||
|
|
||||||
print*,''
|
|
||||||
print*,'********** diff step **********'
|
|
||||||
! Gain in energy
|
|
||||||
delta_e = e_opt - e_cipsi
|
|
||||||
print*, 'Gain in energy during the orbital optimization:', delta_e
|
|
||||||
|
|
||||||
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append')
|
|
||||||
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e
|
|
||||||
close(10)
|
|
||||||
|
|
||||||
! Exit
|
|
||||||
if (delta_e > 1d-12) then
|
|
||||||
print*, 'WARNING, something wrong happened'
|
|
||||||
print*, 'The gain (delta_e) in energy during the optimization process'
|
|
||||||
print*, 'is > 0, but it must be < 0'
|
|
||||||
print*, 'The program will exit'
|
|
||||||
exit
|
|
||||||
endif
|
|
||||||
|
|
||||||
if (n_det > n_det_max_opt) then
|
|
||||||
print*, 'The number of determinants in the wf > n_det_max_opt'
|
|
||||||
print*, 'The program will exit'
|
|
||||||
exit
|
|
||||||
endif
|
|
||||||
|
|
||||||
! To double the number of determinants in the wf
|
|
||||||
N_det_max = int(dble(n_det * 2)*0.9)
|
|
||||||
TOUCH N_det_max
|
|
||||||
|
|
||||||
nb_iter = nb_iter + 1
|
|
||||||
enddo
|
|
||||||
|
|
||||||
end
|
|
||||||
|
5
src/mo_optimization_utils/NEED
Normal file
5
src/mo_optimization_utils/NEED
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
two_body_rdm
|
||||||
|
hartree_fock
|
||||||
|
cipsi
|
||||||
|
davidson_undressed
|
||||||
|
utils_trust_region
|
74
src/mo_optimization_utils/README.md
Normal file
74
src/mo_optimization_utils/README.md
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# Orbital optimization
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
Different methods are available:
|
||||||
|
- full hessian
|
||||||
|
```
|
||||||
|
qp set orbital_optimization optimization_method full
|
||||||
|
```
|
||||||
|
- diagonal hessian
|
||||||
|
```
|
||||||
|
qp set orbital_optimization optimization_method diag
|
||||||
|
```
|
||||||
|
- identity matrix
|
||||||
|
```
|
||||||
|
qp set orbital_optimization optimization_method none
|
||||||
|
```
|
||||||
|
|
||||||
|
After the optimization the ezfio contains the optimized orbitals
|
||||||
|
|
||||||
|
## For a fixed number of determinants
|
||||||
|
To optimize the MOs for the actual determinants:
|
||||||
|
```
|
||||||
|
qp run orb_opt
|
||||||
|
```
|
||||||
|
|
||||||
|
## For a complete optimization, i.e, with a larger and larger wave function
|
||||||
|
To optimize the MOs with a larger and larger wave function:
|
||||||
|
```
|
||||||
|
qp run optimization
|
||||||
|
```
|
||||||
|
|
||||||
|
The results are stored in the EZFIO in "mo_optimization/result_opt",
|
||||||
|
with the following format:
|
||||||
|
(1) (2) (3) (4)
|
||||||
|
1: Number of determinants in the wf,
|
||||||
|
2: Cispi energy before the optimization,
|
||||||
|
3: Cipsi energy after the optimization,
|
||||||
|
4: Energy difference between (2) and (3).
|
||||||
|
|
||||||
|
The optimization process if the following:
|
||||||
|
- we do a first cipsi step to obtain a small number of determinants in the wf
|
||||||
|
- we run an orbital optimization for this wf
|
||||||
|
- we do a new cipsi step to double the number of determinants in the wf
|
||||||
|
- we run an orbital optimization for this wf
|
||||||
|
- ...
|
||||||
|
- we do that until the energy difference between (2) and (3) is
|
||||||
|
smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit)
|
||||||
|
or the wf is larger than a given size (n_det_max_opt in qp_edit)
|
||||||
|
- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation
|
||||||
|
|
||||||
|
### End of the optimization
|
||||||
|
You can choos the number of determinants after what the
|
||||||
|
optimization will stop:
|
||||||
|
```
|
||||||
|
qp set orbital_optimization n_det_max_opt 1e5 # or any number
|
||||||
|
```
|
||||||
|
## Weight of the states
|
||||||
|
You can change the weights of the differents states directly in qp edit.
|
||||||
|
It will affect ths weights used in the orbital optimization.
|
||||||
|
|
||||||
|
# Tests
|
||||||
|
To run the tests:
|
||||||
|
```
|
||||||
|
qp test
|
||||||
|
```
|
||||||
|
|
||||||
|
# Org files
|
||||||
|
The org files are stored in the directory org in order to avoid overwriting on user changes.
|
||||||
|
The org files can be modified, to export the change to the source code, run
|
||||||
|
```
|
||||||
|
./TANGLE_org_mode.sh
|
||||||
|
mv *.irp.f ../.
|
||||||
|
```
|
||||||
|
|
81
src/mo_optimization_utils/routine_opt_mos.irp.f
Normal file
81
src/mo_optimization_utils/routine_opt_mos.irp.f
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
|
||||||
|
subroutine run_optimization_mos_CIPSI
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
double precision :: e_cipsi, e_opt, delta_e
|
||||||
|
double precision, allocatable :: Ev(:),PT2(:)
|
||||||
|
integer :: nb_iter,i
|
||||||
|
logical :: not_converged
|
||||||
|
character (len=100) :: filename
|
||||||
|
|
||||||
|
PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals
|
||||||
|
allocate(Ev(N_states),PT2(N_states))
|
||||||
|
|
||||||
|
not_converged = .True.
|
||||||
|
nb_iter = 0
|
||||||
|
|
||||||
|
! To start from the wf
|
||||||
|
N_det_max = max(n_det,5)
|
||||||
|
TOUCH N_det_max
|
||||||
|
|
||||||
|
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt')
|
||||||
|
write(10,*) " Ndet E_cipsi E_opt Delta_e"
|
||||||
|
call state_average_energy(e_cipsi)
|
||||||
|
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0
|
||||||
|
close(10)
|
||||||
|
|
||||||
|
do while (not_converged)
|
||||||
|
print*,''
|
||||||
|
print*,'======================'
|
||||||
|
print*,' Cipsi step:', nb_iter
|
||||||
|
print*,'======================'
|
||||||
|
print*,''
|
||||||
|
print*,'********** cipsi step **********'
|
||||||
|
! cispi calculation
|
||||||
|
call run_stochastic_cipsi(Ev,PT2)
|
||||||
|
|
||||||
|
! State average energy after the cipsi step
|
||||||
|
call state_average_energy(e_cipsi)
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'********** optimization step **********'
|
||||||
|
! orbital optimization
|
||||||
|
call run_orb_opt_trust_v2
|
||||||
|
|
||||||
|
! State average energy after the orbital optimization
|
||||||
|
call state_average_energy(e_opt)
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'********** diff step **********'
|
||||||
|
! Gain in energy
|
||||||
|
delta_e = e_opt - e_cipsi
|
||||||
|
print*, 'Gain in energy during the orbital optimization:', delta_e
|
||||||
|
|
||||||
|
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append')
|
||||||
|
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e
|
||||||
|
close(10)
|
||||||
|
|
||||||
|
! Exit
|
||||||
|
if (delta_e > 1d-12) then
|
||||||
|
print*, 'WARNING, something wrong happened'
|
||||||
|
print*, 'The gain (delta_e) in energy during the optimization process'
|
||||||
|
print*, 'is > 0, but it must be < 0'
|
||||||
|
print*, 'The program will exit'
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (n_det > n_det_max_opt) then
|
||||||
|
print*, 'The number of determinants in the wf > n_det_max_opt'
|
||||||
|
print*, 'The program will exit'
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
! To double the number of determinants in the wf
|
||||||
|
N_det_max = int(dble(n_det * 2)*0.9)
|
||||||
|
TOUCH N_det_max
|
||||||
|
|
||||||
|
nb_iter = nb_iter + 1
|
||||||
|
enddo
|
||||||
|
|
||||||
|
end
|
@ -101,3 +101,34 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num,
|
|||||||
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_num, ao_num, mo_num) ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Cholesky vectors in MO basis
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
double precision, allocatable :: X(:,:,:)
|
||||||
|
double precision :: wall0, wall1
|
||||||
|
integer :: ierr
|
||||||
|
print *, 'Semi AO->MO Transformation of Cholesky vectors'
|
||||||
|
call wall_time(wall0)
|
||||||
|
|
||||||
|
allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr)
|
||||||
|
if (ierr /= 0) then
|
||||||
|
print *, irp_here, ': Allocation failed'
|
||||||
|
endif
|
||||||
|
integer :: i_chol, i_mo, j_mo, i_ao
|
||||||
|
cholesky_semi_mo_transp_simple = 0.d0
|
||||||
|
do i_mo = 1, mo_num
|
||||||
|
do i_ao = 1, ao_num
|
||||||
|
do j_mo = 1, mo_num
|
||||||
|
do i_chol = 1, cholesky_mo_num
|
||||||
|
cholesky_semi_mo_transp_simple(i_chol, i_ao,i_mo) += cholesky_mo_transp(i_chol,j_mo,i_mo) * mo_coef_transp(j_mo,i_ao)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ end
|
|||||||
! Min and max values of the MOs for which the integrals are in the cache
|
! Min and max values of the MOs for which the integrals are in the cache
|
||||||
END_DOC
|
END_DOC
|
||||||
|
|
||||||
mo_integrals_cache_size = 2_8**mo_integrals_cache_shift
|
mo_integrals_cache_size = 2**mo_integrals_cache_shift
|
||||||
|
|
||||||
mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) )
|
mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) )
|
||||||
mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1)
|
mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1)
|
||||||
|
@ -289,6 +289,106 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)]
|
|||||||
endif
|
endif
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse_bis, (n_points_final_grid)]
|
||||||
|
implicit none
|
||||||
|
integer :: ipoint,m,mm,i,ii,p
|
||||||
|
!!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ
|
||||||
|
!! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ
|
||||||
|
!! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R)
|
||||||
|
!! = \sum_A V_AR G_AR
|
||||||
|
!! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI
|
||||||
|
double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1
|
||||||
|
double precision :: thresh_1,thresh_2
|
||||||
|
double precision, allocatable :: accu_vec(:),delta_vec(:)
|
||||||
|
thresh_2 = ao_cholesky_threshold * 100.d0
|
||||||
|
thresh_1 = dsqrt(thresh_2)
|
||||||
|
provide cholesky_mo_transp
|
||||||
|
if(elec_alpha_num == elec_beta_num)then
|
||||||
|
call wall_time(wall0)
|
||||||
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
|
!$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) &
|
||||||
|
!$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,mos_in_r_array_omp,aos_in_r_array,thresh_1,thresh_2) &
|
||||||
|
!$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_semi_mo_transp_simple,ao_num)
|
||||||
|
allocate(accu_vec(cholesky_mo_num))
|
||||||
|
!$OMP DO
|
||||||
|
do ipoint = 1, n_points_final_grid
|
||||||
|
f_hf_cholesky_sparse_bis(ipoint) = 0.d0
|
||||||
|
accu_vec = 0.d0
|
||||||
|
do ii = 1, n_occ_val_orb_for_hf(1)
|
||||||
|
i = list_valence_orb_for_hf(ii,1)
|
||||||
|
mo_i_r1 = mos_in_r_array_omp(i,ipoint)
|
||||||
|
if(dabs(mo_i_r1).lt.thresh_1)cycle
|
||||||
|
do mm = 1, ao_num ! electron 1
|
||||||
|
mo_b_r1 = aos_in_r_array(mm,ipoint)*mo_i_r1
|
||||||
|
if(dabs(mo_b_r1).lt.thresh_2)cycle
|
||||||
|
do p = 1, cholesky_mo_num
|
||||||
|
accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_semi_mo_transp_simple(p,mm,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
do p = 1, cholesky_mo_num
|
||||||
|
f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p)
|
||||||
|
enddo
|
||||||
|
f_hf_cholesky_sparse_bis(ipoint) *= 2.D0
|
||||||
|
enddo
|
||||||
|
!$OMP END DO
|
||||||
|
deallocate(accu_vec)
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0
|
||||||
|
else
|
||||||
|
call wall_time(wall0)
|
||||||
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
|
!$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) &
|
||||||
|
!$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) &
|
||||||
|
!$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_mo_transp,n_basis_orb)
|
||||||
|
allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num))
|
||||||
|
!$OMP DO
|
||||||
|
do ipoint = 1, n_points_final_grid
|
||||||
|
f_hf_cholesky_sparse_bis(ipoint) = 0.d0
|
||||||
|
accu_vec = 0.d0
|
||||||
|
do ii = 1, n_occ_val_orb_for_hf(2)
|
||||||
|
i = list_valence_orb_for_hf(ii,2)
|
||||||
|
mo_i_r1 = mos_in_r_array_omp(i,ipoint)
|
||||||
|
if(dabs(mo_i_r1).lt.thresh_1)cycle
|
||||||
|
do mm = 1, n_basis_orb ! electron 1
|
||||||
|
m = list_basis(mm)
|
||||||
|
mo_b_r1 = mos_in_r_array_omp(m,ipoint)
|
||||||
|
if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle
|
||||||
|
do p = 1, cholesky_mo_num
|
||||||
|
accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
delta_vec = 0.d0
|
||||||
|
do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1)
|
||||||
|
i = list_valence_orb_for_hf(ii,1)
|
||||||
|
mo_i_r1 = mos_in_r_array_omp(i,ipoint)
|
||||||
|
if(dabs(mo_i_r1).lt.thresh_1)cycle
|
||||||
|
do mm = 1, n_basis_orb ! electron 1
|
||||||
|
m = list_basis(mm)
|
||||||
|
mo_b_r1 = mos_in_r_array_omp(m,ipoint)
|
||||||
|
if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle
|
||||||
|
do p = 1, cholesky_mo_num
|
||||||
|
delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
do p = 1, cholesky_mo_num
|
||||||
|
f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p)
|
||||||
|
enddo
|
||||||
|
f_hf_cholesky_sparse_bis(ipoint) *= 2.D0
|
||||||
|
enddo
|
||||||
|
!$OMP END DO
|
||||||
|
deallocate(accu_vec)
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
call wall_time(wall1)
|
||||||
|
print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0
|
||||||
|
endif
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)]
|
BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)]
|
||||||
implicit none
|
implicit none
|
||||||
integer :: ipoint,i,ii
|
integer :: ipoint,i,ii
|
||||||
|
171
src/mu_of_r/mu_of_r_mean_field.irp.f
Normal file
171
src/mu_of_r/mu_of_r_mean_field.irp.f
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
BEGIN_PROVIDER [ double precision, two_e_int_mf, (elec_beta_num,elec_alpha_num,elec_beta_num,elec_alpha_num)]
|
||||||
|
implicit none
|
||||||
|
integer :: i,j,k,l
|
||||||
|
double precision :: get_two_e_integral
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
do j = 1, elec_beta_num
|
||||||
|
do k = 1, elec_alpha_num
|
||||||
|
do l = 1, elec_beta_num
|
||||||
|
two_e_int_mf(l,k,j,i) = get_two_e_integral(l,k,j,i,mo_integrals_map)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
subroutine get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b)
|
||||||
|
implicit none
|
||||||
|
double precision, intent(in) :: r(3)
|
||||||
|
double precision, intent(out):: f_mf_ab,two_bod_dens, dm_a, dm_b
|
||||||
|
double precision, allocatable :: mos_array_r(:),mos_array_a(:), mos_array_b(:)
|
||||||
|
integer :: i,j,k,l
|
||||||
|
allocate(mos_array_r(mo_num), mos_array_a(elec_alpha_num), mos_array_b(elec_alpha_num))
|
||||||
|
call give_all_mos_at_r(r,mos_array_r)
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
mos_array_a(i) = mos_array_r(i)
|
||||||
|
enddo
|
||||||
|
do i = 1, elec_beta_num
|
||||||
|
mos_array_b(i) = mos_array_r(i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
dm_a = 0.d0
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
dm_a += mos_array_a(i) * mos_array_a(i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
dm_b = 0.d0
|
||||||
|
do i = 1, elec_beta_num
|
||||||
|
dm_b += mos_array_b(i) * mos_array_b(i)
|
||||||
|
enddo
|
||||||
|
two_bod_dens = dm_a * dm_b
|
||||||
|
|
||||||
|
f_mf_ab = 0.d0
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
do j = 1, elec_beta_num
|
||||||
|
do k = 1, elec_alpha_num
|
||||||
|
do l = 1, elec_beta_num
|
||||||
|
f_mf_ab += two_e_int_mf(l,k,j,i) * mos_array_a(i) * mos_array_a(k) * mos_array_b(j) * mos_array_b(l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm
|
||||||
|
f_mf_ab *= 2.d0
|
||||||
|
two_bod_dens *= 2.d0
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b)
|
||||||
|
implicit none
|
||||||
|
double precision, intent(in) :: r(3)
|
||||||
|
double precision, intent(out) :: f_mf_ab, two_bod_dens
|
||||||
|
double precision, intent(out) :: grad_two_bod_dens(3), grad_f_mf_ab(3)
|
||||||
|
double precision, intent(out) :: dm_a, dm_b, grad_dm_a(3), grad_dm_b(3)
|
||||||
|
|
||||||
|
double precision, allocatable :: mos_array_r(:), mos_grad_array_r(:,:)
|
||||||
|
double precision, allocatable :: mos_array_a(:), mos_array_b(:)
|
||||||
|
double precision, allocatable :: mos_grad_array_a(:,:), mos_grad_array_b(:,:)
|
||||||
|
double precision :: mo_i, mo_j, mo_k, mo_l
|
||||||
|
double precision :: grad_mo_i(3), grad_mo_j(3), grad_mo_k(3), grad_mo_l(3)
|
||||||
|
|
||||||
|
integer :: i,j,k,l
|
||||||
|
allocate(mos_array_r(mo_num),mos_grad_array_r(3,mo_num))
|
||||||
|
allocate(mos_array_a(elec_alpha_num), mos_array_b(elec_beta_num))
|
||||||
|
allocate(mos_grad_array_a(3,elec_alpha_num), mos_grad_array_b(3,elec_beta_num))
|
||||||
|
call give_all_mos_and_grad_at_r(r,mos_array_r,mos_grad_array_r)
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
mos_array_a(i) = mos_array_r(i)
|
||||||
|
mos_grad_array_a(1:3,i) = mos_grad_array_r(1:3,i)
|
||||||
|
enddo
|
||||||
|
do i = 1, elec_beta_num
|
||||||
|
mos_array_b(i) = mos_array_r(i)
|
||||||
|
mos_grad_array_b(1:3,i) = mos_grad_array_r(1:3,i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! ALPHA DENSITY AND GRADIENT
|
||||||
|
dm_a = 0.d0
|
||||||
|
grad_dm_a = 0.d0
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
dm_a += mos_array_a(i) * mos_array_a(i)
|
||||||
|
grad_dm_a(1:3) += 2.d0 * mos_array_a(i) * mos_grad_array_a(1:3,i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! BETA DENSITY AND GRADIENT
|
||||||
|
dm_b = 0.d0
|
||||||
|
grad_dm_b = 0.d0
|
||||||
|
do i = 1, elec_beta_num
|
||||||
|
dm_b += mos_array_b(i) * mos_array_b(i)
|
||||||
|
grad_dm_b(1:3) += 2.d0 * mos_array_b(i) * mos_grad_array_b(1:3,i)
|
||||||
|
enddo
|
||||||
|
! TWO-BODY DENSITY AND GRADIENT
|
||||||
|
two_bod_dens = dm_a * dm_b
|
||||||
|
grad_two_bod_dens(1:3) = dm_a * grad_dm_b(1:3) + dm_b * grad_dm_a(1:3)
|
||||||
|
|
||||||
|
! F_MF and GRADIENT
|
||||||
|
grad_f_mf_ab = 0.d0
|
||||||
|
f_mf_ab = 0.d0
|
||||||
|
do i = 1, elec_alpha_num
|
||||||
|
mo_i = mos_array_a(i)
|
||||||
|
grad_mo_i(1:3) = mos_grad_array_a(1:3,i)
|
||||||
|
do j = 1, elec_beta_num
|
||||||
|
mo_j = mos_array_b(j)
|
||||||
|
grad_mo_j(1:3) = mos_grad_array_b(1:3,j)
|
||||||
|
do k = 1, elec_alpha_num
|
||||||
|
mo_k = mos_array_a(k)
|
||||||
|
grad_mo_k(1:3) = mos_grad_array_a(1:3,k)
|
||||||
|
do l = 1, elec_beta_num
|
||||||
|
mo_l = mos_array_b(l)
|
||||||
|
grad_mo_l(1:3) = mos_grad_array_b(1:3,l)
|
||||||
|
f_mf_ab += two_e_int_mf(l,k,j,i) * mo_i * mo_j * mo_k * mo_l
|
||||||
|
grad_f_mf_ab(1:3) += two_e_int_mf(l,k,j,i) * &
|
||||||
|
(mo_i * mo_j * mo_k * grad_mo_l(1:3) + mo_i * mo_j * grad_mo_k(1:3) * mo_l &
|
||||||
|
+mo_i * grad_mo_j(1:3) * mo_k * mo_l + grad_mo_i(1:3) * mo_j * mo_k * mo_l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
f_mf_ab *= 2.d0
|
||||||
|
two_bod_dens *= 2.d0
|
||||||
|
grad_f_mf_ab *= 2.D0
|
||||||
|
grad_two_bod_dens *= 2.d0
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine mu_of_r_mean_field(r,mu_mf, dm)
|
||||||
|
implicit none
|
||||||
|
include 'constants.include.F'
|
||||||
|
double precision, intent(in) :: r(3)
|
||||||
|
double precision, intent(out):: mu_mf, dm
|
||||||
|
double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b
|
||||||
|
call get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b)
|
||||||
|
dm = dm_a + dm_b
|
||||||
|
if(dabs(two_bod_dens).lt.1.d-10)then
|
||||||
|
mu_mf = 1.d+10
|
||||||
|
else
|
||||||
|
mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens
|
||||||
|
endif
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm)
|
||||||
|
implicit none
|
||||||
|
include 'constants.include.F'
|
||||||
|
double precision, intent(in) :: r(3)
|
||||||
|
double precision, intent(out):: grad_mu_mf(3), grad_dm(3)
|
||||||
|
double precision, intent(out):: mu_mf, dm
|
||||||
|
double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3),grad_dm_a(3), grad_dm_b(3)
|
||||||
|
double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b
|
||||||
|
call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b)
|
||||||
|
|
||||||
|
dm = dm_a + dm_b
|
||||||
|
grad_dm(1:3) = grad_dm_a(1:3) + grad_dm_b(1:3)
|
||||||
|
|
||||||
|
if(dabs(two_bod_dens).lt.1.d-10)then
|
||||||
|
mu_mf = 1.d+10
|
||||||
|
grad_mu_mf = 0.d0
|
||||||
|
else
|
||||||
|
mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens
|
||||||
|
grad_mu_mf(1:3) = 0.5d0 * sqpi * (grad_f_mf_ab(1:3) * two_bod_dens - f_mf_ab * grad_two_bod_dens(1:3))&
|
||||||
|
/(two_bod_dens*two_bod_dens)
|
||||||
|
endif
|
||||||
|
|
||||||
|
end
|
@ -15,7 +15,162 @@ program projected_operators
|
|||||||
! call test_f_HF_valence_ab
|
! call test_f_HF_valence_ab
|
||||||
! call routine_full_mos
|
! call routine_full_mos
|
||||||
! call test_f_ii_valence_ab
|
! call test_f_ii_valence_ab
|
||||||
call test_f_ia_valence_ab
|
! call test_f_ia_valence_ab
|
||||||
call test_f_ii_ia_aa_valence_ab
|
! call test_f_ii_ia_aa_valence_ab
|
||||||
|
! call test
|
||||||
|
! call test_f_mean_field
|
||||||
|
! call test_grad_f_mean_field
|
||||||
|
call test_grad_mu_mf
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
subroutine test
|
||||||
|
implicit none
|
||||||
|
integer :: i_point
|
||||||
|
double precision :: ref, new, accu, weight
|
||||||
|
accu = 0.d0
|
||||||
|
do i_point = 1, n_points_final_grid
|
||||||
|
ref = f_hf_cholesky_sparse(i_point)
|
||||||
|
new = f_hf_cholesky_sparse_bis(i_point)
|
||||||
|
weight = final_weight_at_r_vector(i_point)
|
||||||
|
accu += dabs(ref - new) * weight
|
||||||
|
enddo
|
||||||
|
print*,'accu = ',accu
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine test_f_mean_field
|
||||||
|
implicit none
|
||||||
|
integer :: i_point
|
||||||
|
double precision :: weight,r(3)
|
||||||
|
double precision :: ref_f, new_f, accu_f
|
||||||
|
double precision :: ref_two_dens, new_two_dens, accu_two_dens, dm_a, dm_b
|
||||||
|
accu_f = 0.d0
|
||||||
|
accu_two_dens = 0.d0
|
||||||
|
do i_point = 1, n_points_final_grid
|
||||||
|
r(1:3) = final_grid_points(1:3,i_point)
|
||||||
|
weight = final_weight_at_r_vector(i_point)
|
||||||
|
call get_f_mf_ab(r,new_f,new_two_dens, dm_a, dm_b)
|
||||||
|
call f_HF_valence_ab(r,r,ref_f,ref_two_dens)
|
||||||
|
accu_f += weight * dabs(new_f- ref_f)
|
||||||
|
accu_two_dens += weight * dabs(new_two_dens - ref_two_dens)
|
||||||
|
enddo
|
||||||
|
print*,'accu_f = ',accu_f
|
||||||
|
print*,'accu_two_dens = ',accu_two_dens
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine test_grad_f_mean_field
|
||||||
|
implicit none
|
||||||
|
integer :: i_point,k
|
||||||
|
double precision :: weight,r(3)
|
||||||
|
double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3)
|
||||||
|
double precision :: grad_dm_a(3), grad_dm_b(3)
|
||||||
|
double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b
|
||||||
|
|
||||||
|
double precision :: num_grad_f_mf_ab(3), num_grad_two_bod_dens(3)
|
||||||
|
double precision :: num_grad_dm_a(3), num_grad_dm_b(3)
|
||||||
|
double precision :: f_mf_ab_p,f_mf_ab_m
|
||||||
|
double precision :: two_bod_dens_p, two_bod_dens_m
|
||||||
|
double precision :: dm_a_p, dm_a_m
|
||||||
|
double precision :: dm_b_p, dm_b_m
|
||||||
|
double precision :: rbis(3), dr
|
||||||
|
double precision :: accu_grad_f_mf_ab(3),accu_grad_two_bod_dens(3)
|
||||||
|
double precision :: accu_grad_dm_a(3),accu_grad_dm_b(3)
|
||||||
|
double precision :: accu_f_mf_ab, accu_two_bod_dens, accu_dm_a, accu_dm_b
|
||||||
|
dr = 0.00001d0
|
||||||
|
accu_f_mf_ab = 0.d0
|
||||||
|
accu_two_bod_dens = 0.d0
|
||||||
|
accu_dm_a = 0.d0
|
||||||
|
accu_dm_b = 0.d0
|
||||||
|
|
||||||
|
accu_grad_f_mf_ab = 0.d0
|
||||||
|
accu_grad_two_bod_dens = 0.d0
|
||||||
|
accu_grad_dm_a = 0.d0
|
||||||
|
accu_grad_dm_b = 0.d0
|
||||||
|
do i_point = 1, n_points_final_grid
|
||||||
|
r(1:3) = final_grid_points(1:3,i_point)
|
||||||
|
weight = final_weight_at_r_vector(i_point)
|
||||||
|
call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b)
|
||||||
|
call get_f_mf_ab(r,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p)
|
||||||
|
accu_f_mf_ab += weight * dabs(f_mf_ab - f_mf_ab_p)
|
||||||
|
accu_two_bod_dens += weight * dabs(two_bod_dens - two_bod_dens_p)
|
||||||
|
accu_dm_a += weight*dabs(dm_a - dm_a_p)
|
||||||
|
accu_dm_b += weight*dabs(dm_b - dm_b_p)
|
||||||
|
do k = 1, 3
|
||||||
|
rbis = r
|
||||||
|
rbis(k) += dr
|
||||||
|
call get_f_mf_ab(rbis,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p)
|
||||||
|
rbis = r
|
||||||
|
rbis(k) -= dr
|
||||||
|
call get_f_mf_ab(rbis,f_mf_ab_m,two_bod_dens_m, dm_a_m, dm_b_m)
|
||||||
|
num_grad_f_mf_ab(k) = (f_mf_ab_p - f_mf_ab_m)/(2.d0*dr)
|
||||||
|
num_grad_two_bod_dens(k) = (two_bod_dens_p - two_bod_dens_m)/(2.d0*dr)
|
||||||
|
num_grad_dm_a(k) = (dm_a_p - dm_a_m)/(2.d0*dr)
|
||||||
|
num_grad_dm_b(k) = (dm_b_p - dm_b_m)/(2.d0*dr)
|
||||||
|
enddo
|
||||||
|
do k = 1, 3
|
||||||
|
accu_grad_f_mf_ab(k) += weight * dabs(grad_f_mf_ab(k) - num_grad_f_mf_ab(k))
|
||||||
|
accu_grad_two_bod_dens(k) += weight * dabs(grad_two_bod_dens(k) - num_grad_two_bod_dens(k))
|
||||||
|
accu_grad_dm_a(k) += weight * dabs(grad_dm_a(k) - num_grad_dm_a(k))
|
||||||
|
accu_grad_dm_b(k) += weight * dabs(grad_dm_b(k) - num_grad_dm_b(k))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'accu_f_mf_ab = ',accu_f_mf_ab
|
||||||
|
print*,'accu_two_bod_dens = ',accu_two_bod_dens
|
||||||
|
print*,'accu_dm_a = ',accu_dm_a
|
||||||
|
print*,'accu_dm_b = ',accu_dm_b
|
||||||
|
print*,'accu_grad_f_mf_ab = '
|
||||||
|
print*,accu_grad_f_mf_ab
|
||||||
|
print*,'accu_grad_two_bod_dens = '
|
||||||
|
print*,accu_grad_two_bod_dens
|
||||||
|
print*,'accu_dm_a = '
|
||||||
|
print*,accu_grad_dm_a
|
||||||
|
print*,'accu_dm_b = '
|
||||||
|
print*,accu_grad_dm_b
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine test_grad_mu_mf
|
||||||
|
implicit none
|
||||||
|
integer :: i_point,k
|
||||||
|
double precision :: weight,r(3),rbis(3)
|
||||||
|
double precision :: mu_mf, dm,grad_mu_mf(3), grad_dm(3)
|
||||||
|
double precision :: mu_mf_p, mu_mf_m, dm_m, dm_p, num_grad_mu_mf(3),dr, num_grad_dm(3)
|
||||||
|
double precision :: accu_mu, accu_dm, accu_grad_dm(3), accu_grad_mu_mf(3)
|
||||||
|
dr = 0.00001d0
|
||||||
|
accu_grad_mu_mf = 0.d0
|
||||||
|
accu_mu = 0.d0
|
||||||
|
accu_grad_dm = 0.d0
|
||||||
|
accu_dm = 0.d0
|
||||||
|
do i_point = 1, n_points_final_grid
|
||||||
|
r(1:3) = final_grid_points(1:3,i_point)
|
||||||
|
weight = final_weight_at_r_vector(i_point)
|
||||||
|
call grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm)
|
||||||
|
call mu_of_r_mean_field(r,mu_mf_p, dm_p)
|
||||||
|
accu_mu += weight*dabs(mu_mf_p - mu_mf)
|
||||||
|
accu_dm += weight*dabs(dm_p - dm)
|
||||||
|
do k = 1, 3
|
||||||
|
rbis = r
|
||||||
|
rbis(k) += dr
|
||||||
|
call mu_of_r_mean_field(rbis,mu_mf_p, dm_p)
|
||||||
|
rbis = r
|
||||||
|
rbis(k) -= dr
|
||||||
|
call mu_of_r_mean_field(rbis,mu_mf_m, dm_m)
|
||||||
|
|
||||||
|
num_grad_mu_mf(k) = (mu_mf_p - mu_mf_m)/(2.d0*dr)
|
||||||
|
num_grad_dm(k) = (dm_p - dm_m)/(2.d0*dr)
|
||||||
|
enddo
|
||||||
|
do k = 1, 3
|
||||||
|
accu_grad_dm(k)+= weight *dabs(num_grad_dm(k) - grad_dm(k))
|
||||||
|
accu_grad_mu_mf(k)+= weight *dabs(num_grad_mu_mf(k) - grad_mu_mf(k))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'accu_mu = ',accu_mu
|
||||||
|
print*,'accu_dm = ',accu_dm
|
||||||
|
print*,'accu_grad_dm = '
|
||||||
|
print*, accu_grad_dm
|
||||||
|
print*,'accu_grad_mu_mf = '
|
||||||
|
print*, accu_grad_mu_mf
|
||||||
|
|
||||||
|
end
|
||||||
|
@ -12,6 +12,9 @@ program four_idx_transform
|
|||||||
!
|
!
|
||||||
END_DOC
|
END_DOC
|
||||||
|
|
||||||
|
if (do_mo_cholesky) then
|
||||||
|
stop 'Not implemented with Cholesky integrals'
|
||||||
|
endif
|
||||||
io_mo_two_e_integrals = 'Write'
|
io_mo_two_e_integrals = 'Write'
|
||||||
SOFT_TOUCH io_mo_two_e_integrals
|
SOFT_TOUCH io_mo_two_e_integrals
|
||||||
if (.true.) then
|
if (.true.) then
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user