9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-09-01 05:33:40 +02:00

Merge branch 'dev-stable' of https://github.com/AbdAmmar/qp2 into dev-stable

This commit is contained in:
Abdallah Ammar 2024-07-29 17:22:53 +02:00
commit eb236e0112
103 changed files with 4182 additions and 1978 deletions

18
configure vendored
View File

@ -40,7 +40,7 @@ Usage:
$(basename $0) -c <file> $(basename $0) -c <file>
$(basename $0) -h $(basename $0) -h
$(basename $0) -i <package> $(basename $0) -i <package>
$(basename $0) -g [nvidia|none] $(basename $0) -g [nvidia|intel|none]
Options: Options:
-c <file> Define a COMPILATION configuration file, -c <file> Define a COMPILATION configuration file,
@ -49,7 +49,7 @@ Options:
-i <package> INSTALL <package>. Use at your OWN RISK: -i <package> INSTALL <package>. Use at your OWN RISK:
no support will be provided for the installation of no support will be provided for the installation of
dependencies. dependencies.
-g [nvidia|none] Choose GPU acceleration (experimental) -g [nvidia|intel|none] Choose GPU acceleration
Example: Example:
./$(basename $0) -c config/gfortran.cfg ./$(basename $0) -c config/gfortran.cfg
@ -115,19 +115,23 @@ while getopts "d:c:i:g:h" c ; do
done done
# Handle GPU acceleration # Handle GPU acceleration
rm -f ${QP_ROOT}/src/gpu rm -f ${QP_ROOT}/src/gpu_arch
case "$GPU" in case "$GPU" in
amd) # Nvidia amd) # AMD
echo "Activating AMD GPU acceleration" echo "Activating AMD GPU acceleration"
ln -s ${QP_ROOT}/src/gpu_amd ${QP_ROOT}/src/gpu ln -s ${QP_ROOT}/plugins/local/gpu_amd ${QP_ROOT}/src/gpu_arch
;;
intel) # Intel
echo "Activating Intel GPU acceleration (EXPERIMENTAL)"
ln -s ${QP_ROOT}/plugins/local/gpu_intel ${QP_ROOT}/src/gpu_arch
;; ;;
nvidia) # Nvidia nvidia) # Nvidia
echo "Activating Nvidia GPU acceleration" echo "Activating Nvidia GPU acceleration"
ln -s ${QP_ROOT}/src/gpu_nvidia ${QP_ROOT}/src/gpu ln -s ${QP_ROOT}/plugins/local/gpu_nvidia ${QP_ROOT}/src/gpu_arch
;; ;;
*) # No Acceleration *) # No Acceleration
echo "Disabling GPU acceleration" echo "Disabling GPU acceleration"
ln -s ${QP_ROOT}/src/gpu_x86 ${QP_ROOT}/src/gpu ln -s ${QP_ROOT}/plugins/local/gpu_x86 ${QP_ROOT}/src/gpu_arch
;; ;;
esac esac

View File

@ -1,4 +1,36 @@
! ---
subroutine run_pouet
BEGIN_DOC
! Selected Full Configuration Interaction with Stochastic selection and PT2.
END_DOC
use selection_types
implicit none
integer :: i, j, k, ndet
integer :: to_select
logical :: has
type(pt2_type) :: pt2_data, pt2_data_err
double precision :: rss
double precision :: correlation_energy_ratio
double precision :: hf_energy_ref
double precision :: relative_error
double precision, allocatable :: zeros(:),E_tc(:), norm(:)
logical, external :: qp_stop
double precision, external :: memory_of_double
PROVIDE mo_l_coef mo_r_coef
PROVIDE H_apply_buffer_allocated distributed_davidson
print*, ' Diagonal elements of the Fock matrix '
do i = 1, mo_num
write(*,*) i, Fock_matrix_tc_mo_tot(i,i)
enddo
end
! --- ! ---
subroutine run_stochastic_cipsi subroutine run_stochastic_cipsi

View File

@ -65,7 +65,15 @@ subroutine run_cipsi_tc()
if (.not. is_zmq_slave) then if (.not. is_zmq_slave) then
if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !!
! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot
PROVIDE Fock_matrix_tc_mo_tot
! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot
! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided
endif
if(.True.)then ! DO NOT REMOVE THE IF(.TRUE.) !!
PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
endif
if((elec_alpha_num+elec_beta_num) .ge. 3) then if((elec_alpha_num+elec_beta_num) .ge. 3) then
if(three_body_h_tc) then if(three_body_h_tc) then
@ -90,8 +98,16 @@ subroutine run_cipsi_tc()
call json_close call json_close
else else
if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !!
! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot
PROVIDE Fock_matrix_tc_mo_tot
! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot
! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided
endif
if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !!
PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks
endif
if((elec_alpha_num+elec_beta_num) .ge. 3) then if((elec_alpha_num+elec_beta_num) .ge. 3) then
if(three_body_h_tc) then if(three_body_h_tc) then

View File

@ -0,0 +1,2 @@
-ltbb -lsycl -lmkl_sycl -lgpu -limf -lintlc -lstdc++

View File

@ -0,0 +1,8 @@
=========
gpu_intel
=========
Intel implementation of GPU routines. Uses MKL and SYCL.
```bash
icpx -fsycl gpu.cxx -c -qmkl=sequential
```

View File

@ -0,0 +1,177 @@
#include <CL/sycl.hpp>
#include <cassert>
#include <limits>
#include <oneapi/mkl/blas.hpp>
extern "C" {
/* Generic functions */
int gpu_ndevices() {
return 1;
}
void gpu_set_device(int32_t igpu) {
}
/* Allocation functions */
void gpu_allocate(void** ptr, int64_t size) {
auto queue = sycl::queue(sycl::default_selector_v);
try {
*ptr = sycl::malloc_shared(size, queue);
assert(*ptr != nullptr);
} catch (const sycl::exception& e) {
std::cerr << "SYCL exception caught: " << e.what() << std::endl;
*ptr = nullptr; // If allocation fails, set pointer to nullptr
}
}
void gpu_deallocate(void** ptr) {
assert(*ptr != nullptr);
sycl::free(*ptr, sycl::queue(sycl::default_selector_v));
*ptr = nullptr;
}
/* Upload data from host to device */
void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
sycl::queue queue(sycl::default_selector_v);
queue.memcpy(gpu_ptr, cpu_ptr, n).wait();
}
/* Download data from device to host */
void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
sycl::queue queue(sycl::default_selector_v);
queue.memcpy(cpu_ptr, gpu_ptr, n).wait();
}
/* Copy data from one GPU memory location to another */
void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
sycl::queue queue(sycl::default_selector_v);
queue.memcpy(gpu_ptr_dest, gpu_ptr_src, n).wait();
}
/* Queues */
/* SYCL queue as a replacement for CUDA stream */
void gpu_stream_create(sycl::queue** ptr) {
*ptr = new sycl::queue(sycl::default_selector_v);
}
void gpu_stream_destroy(sycl::queue** ptr) {
assert(*ptr != nullptr);
delete *ptr;
*ptr = nullptr;
}
void gpu_synchronize() {
sycl::queue queue(sycl::default_selector_v);
queue.wait_and_throw();
}
/* BLAS functions */
typedef struct {
sycl::queue* queue;
} blasHandle_t;
void gpu_set_stream(blasHandle_t* handle, sycl::queue* ptr) {
handle->queue = ptr;
}
void gpu_blas_create(blasHandle_t** ptr) {
*ptr = (blasHandle_t*) malloc(sizeof(blasHandle_t));
assert(*ptr != nullptr);
(*ptr)->queue = new sycl::queue(sycl::default_selector_v);
assert((*ptr)->queue != nullptr);
}
void gpu_blas_destroy(blasHandle_t** ptr) {
assert(*ptr != nullptr);
delete (*ptr)->queue;
free(*ptr);
*ptr = nullptr;
}
void gpu_ddot(blasHandle_t* handle, const int64_t n, const double* x, const int64_t incx,
const double* y, const int64_t incy, double* result) {
// Ensure input parameters are valid
assert(handle != nullptr);
assert(handle->queue != nullptr);
assert(n > 0);
assert(incx > 0);
assert(incy > 0);
assert(x != nullptr);
assert(y != nullptr);
assert(result != nullptr);
oneapi::mkl::blas::dot(*handle->queue, n, x, incx, y, incy, result);
}
void gpu_dgemv(blasHandle_t* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
assert(handle != nullptr);
assert(handle->queue != nullptr);
// Validate matrix dimensions and increments to be positive
assert(m > 0 && n > 0 && lda > 0 && incx > 0 && incy > 0);
assert(a != nullptr && x != nullptr && y != nullptr && alpha != nullptr && beta != nullptr);
// Determine the operation type
oneapi::mkl::transpose transa_ = oneapi::mkl::transpose::nontrans;
if (*transa == 'T' || *transa == 't') {
transa_ = oneapi::mkl::transpose::trans;
}
// Perform DGEMV operation using oneMKL
oneapi::mkl::blas::column_major::gemv(*handle->queue, transa_, m, n, *alpha, a, lda, x, incx, *beta, y, incy);
}
void gpu_dgemm(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
assert(handle != nullptr && handle->queue != nullptr);
assert(m > 0 && n > 0 && k > 0 && lda > 0 && ldb > 0 && ldc > 0);
assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr);
// Transpose operations
auto transa_ = (*transa == 'T' || *transa == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans;
auto transb_ = (*transb == 'T' || *transb == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans;
oneapi::mkl::blas::column_major::gemm(*handle->queue, transa_, transb_, m, n, k,
*alpha, a, lda, b, ldb, *beta, c, ldc);
}
void gpu_dgeam(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
assert(handle != nullptr && handle->queue != nullptr);
assert(m > 0 && n > 0 && lda > 0 && ldb > 0 && ldc > 0);
assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr);
// Determine transpose operations
bool transA = (*transa == 'T' || *transa == 't');
bool transB = (*transb == 'T' || *transb == 't');
handle->queue->submit([&](sycl::handler& cgh) {
cgh.parallel_for(sycl::range<2>(m, n), [=](sycl::id<2> idx) {
const int i = idx[0];
const int j = idx[1];
const int ai = transA ? j * lda + i : i * lda + j;
const int bi = transB ? j * ldb + i : i * ldb + j;
const int ci = i * ldc + j;
c[ci] = (*alpha) * a[ai] + (*beta) * b[bi];
});
});
}
} // extern C

View File

@ -0,0 +1 @@
-lcudart -lcublas -lcublasLt

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,5 @@
==========
gpu_nvidia
==========
Nvidia implementation of GPU routines. Uses CUDA and CUBLAS libraries.

View File

@ -0,0 +1,326 @@
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <cublas_v2.h>
#include <cuda_runtime.h>
/* Generic functions */
int gpu_ndevices() {
int ngpus;
cudaGetDeviceCount(&ngpus);
return ngpus;
}
void gpu_set_device(int32_t igpu) {
cudaSetDevice((int) igpu);
}
/* Allocation functions */
void gpu_allocate(void** ptr, const int64_t size) {
size_t free, total;
cudaError_t rc = cudaMemGetInfo( &free, &total );
if (rc != cudaSuccess) {
free = INT64_MAX;
}
rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal);
// /* Use managed memory if it does not fit on the GPU */
// if (size < free && size < total/2) {
// rc= cudaMalloc(ptr, size);
// } else {
// rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal);
// }
assert (rc == cudaSuccess);
}
void gpu_deallocate(void** ptr) {
assert (*ptr != NULL);
cudaFree(*ptr);
*ptr = NULL;
}
/* Memory transfer functions */
void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
cudaMemcpy (gpu_ptr, cpu_ptr, n, cudaMemcpyHostToDevice);
}
void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
cudaMemcpy (cpu_ptr, gpu_ptr, n, cudaMemcpyDeviceToHost);
}
void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
cudaMemcpy (gpu_ptr_dest, gpu_ptr_src, n, cudaMemcpyDeviceToDevice);
}
/* Streams */
void gpu_stream_create(cudaStream_t* ptr) {
cudaError_t rc = cudaStreamCreate(ptr);
assert (rc == cudaSuccess);
}
void gpu_stream_destroy(cudaStream_t* ptr) {
assert (ptr != NULL);
cudaError_t rc = cudaStreamDestroy(*ptr);
assert (rc == cudaSuccess);
*ptr = NULL;
}
void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) {
cublasSetStream(handle, stream);
}
void gpu_synchronize() {
cudaDeviceSynchronize();
}
/* BLAS functions */
void gpu_blas_create(cublasHandle_t* ptr) {
cublasStatus_t rc = cublasCreate(ptr);
assert (rc == CUBLAS_STATUS_SUCCESS);
}
void gpu_blas_destroy(cublasHandle_t* ptr) {
assert (ptr != NULL);
cublasStatus_t rc = cublasDestroy(*ptr);
assert (rc == CUBLAS_STATUS_SUCCESS);
ptr = NULL;
}
void gpu_ddot(cublasHandle_t handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
assert (handle != NULL);
/* Convert to int */
int n_, incx_, incy_;
n_ = (int) n;
incx_ = (int) incx;
incy_ = (int) incy;
assert ( (int64_t) n_ == n );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
cublasStatus_t rc = cublasDdot(handle, n_, x, incx_, y, incy_, result);
assert (rc == CUBLAS_STATUS_SUCCESS);
}
void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
assert (handle != NULL);
/* Convert to int */
int n_, incx_, incy_;
n_ = (int) n;
incx_ = (int) incx;
incy_ = (int) incy;
/* Check for integer overflows */
assert ( (int64_t) n_ == n );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
float result_ = 0.;
cublasStatus_t rc = cublasSdot(handle, n_, x, incx_, y, incy_, &result_);
assert (rc == CUBLAS_STATUS_SUCCESS);
*result = result_;
}
void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
assert (handle != NULL);
/* Convert to int */
int m_, n_, lda_, incx_, incy_;
m_ = (int) m;
n_ = (int) n;
lda_ = (int) lda;
incx_ = (int) incx;
incy_ = (int) incy;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) lda_ == lda );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
cublasOperation_t transa_ = CUBLAS_OP_N;
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
cublasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
}
void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
assert (handle != NULL);
/* Convert to int */
int m_, n_, lda_, incx_, incy_;
m_ = (int) m;
n_ = (int) n;
lda_ = (int) lda;
incx_ = (int) incx;
incy_ = (int) incy;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) lda_ == lda );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
cublasOperation_t transa_ = CUBLAS_OP_N;
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
cublasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_);
}
void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
assert (handle != NULL);
/* Convert to int */
int m_, n_, k_, lda_, ldb_, ldc_;
m_ = (int) m;
n_ = (int) n;
k_ = (int) k;
lda_ = (int) lda;
ldb_ = (int) ldb;
ldc_ = (int) ldc;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) k_ == k );
assert ( (int64_t) lda_ == lda);
assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc);
cublasOperation_t transa_ = CUBLAS_OP_N;
cublasOperation_t transb_ = CUBLAS_OP_N;
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
cublasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
}
void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
assert (handle != NULL);
/* Convert to int */
int m_, n_, k_, lda_, ldb_, ldc_;
m_ = (int) m;
n_ = (int) n;
k_ = (int) k;
lda_ = (int) lda;
ldb_ = (int) ldb;
ldc_ = (int) ldc;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) k_ == k );
assert ( (int64_t) lda_ == lda);
assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc);
cublasOperation_t transa_ = CUBLAS_OP_N;
cublasOperation_t transb_ = CUBLAS_OP_N;
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
cublasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_);
}
void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
assert (handle != NULL);
/* Convert to int */
int m_, n_, lda_, ldb_, ldc_;
m_ = (int) m;
n_ = (int) n;
lda_ = (int) lda;
ldb_ = (int) ldb;
ldc_ = (int) ldc;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) lda_ == lda);
assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc);
cublasOperation_t transa_ = CUBLAS_OP_N;
cublasOperation_t transb_ = CUBLAS_OP_N;
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
cublasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
}
void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
assert (handle != NULL);
/* Convert to int */
int m_, n_, lda_, ldb_, ldc_;
m_ = (int) m;
n_ = (int) n;
lda_ = (int) lda;
ldb_ = (int) ldb;
ldc_ = (int) ldc;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) lda_ == lda);
assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc);
cublasOperation_t transa_ = CUBLAS_OP_N;
cublasOperation_t transb_ = CUBLAS_OP_N;
if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T;
if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T;
cublasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_);
}

View File

@ -0,0 +1 @@

View File

@ -2,13 +2,13 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdbool.h>
#include <assert.h> #include <assert.h>
/* Generic functions */ /* Generic functions */
int gpu_ndevices() { int gpu_ndevices() {
return 1; return 0;
} }
void gpu_set_device(int32_t i) { void gpu_set_device(int32_t i) {
@ -25,7 +25,7 @@ void gpu_allocate(void** ptr, const int64_t n) {
} }
} }
void gpu_free(void** ptr) { void gpu_deallocate(void** ptr) {
free(*ptr); free(*ptr);
*ptr = NULL; *ptr = NULL;
} }
@ -49,10 +49,11 @@ void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
/* Streams */ /* Streams */
void gpu_stream_create(void** ptr) { void gpu_stream_create(void** ptr) {
*ptr = (void*) 2; *ptr = (void*) malloc(sizeof(char));
} }
void gpu_stream_destroy(void** ptr) { void gpu_stream_destroy(void** ptr) {
free(*ptr);
*ptr = NULL; *ptr = NULL;
} }
@ -68,18 +69,19 @@ void gpu_synchronize() {
/* BLAS functions */ /* BLAS functions */
void gpu_blas_create(void** handle) { void gpu_blas_create(void** handle) {
*handle = (void*) 1; *handle = (void*) malloc(sizeof(char));
} }
void gpu_blas_destroy(void** handle) { void gpu_blas_destroy(void** handle) {
free(*handle);
*handle = NULL; *handle = NULL;
} }
double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy); double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { void gpu_ddot(void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
assert (handle != NULL); assert (handle != NULL);
/* Convert to int32_t */ /* Convert to int32_t */
@ -100,7 +102,7 @@ void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_
float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy); float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
assert (handle != NULL); assert (handle != NULL);
/* Convert to int32_t */ /* Convert to int32_t */
@ -122,8 +124,8 @@ void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) { const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
assert (handle != NULL); assert (handle != NULL);
@ -143,15 +145,15 @@ void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int
assert ( (int64_t) incx_ == incx); assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy); assert ( (int64_t) incy_ == incy);
dgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); dgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
} }
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) { const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
assert (handle != NULL); assert (handle != NULL);
@ -171,15 +173,15 @@ void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int
assert ( (int64_t) incx_ == incx); assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy); assert ( (int64_t) incy_ == incy);
sgemv_(&transa, &m_, &n_, &alpha, a, &lda_, x, &incx_, &beta, y, &incy_); sgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
} }
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) { const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
assert (handle != NULL); assert (handle != NULL);
@ -201,7 +203,7 @@ void gpu_dgemm(const void* handle, const char transa, const char transb, const i
assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc); assert ( (int64_t) ldc_ == ldc);
dgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); dgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
} }
@ -209,8 +211,8 @@ void gpu_dgemm(const void* handle, const char transa, const char transb, const i
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) { const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
assert (handle != NULL); assert (handle != NULL);
@ -232,136 +234,133 @@ void gpu_sgemm(const void* handle, const char transa, const char transb, const i
assert ( (int64_t) ldb_ == ldb); assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc); assert ( (int64_t) ldc_ == ldc);
sgemm_(&transa, &transb, &m_, &n_, &k_, &alpha, a, &lda_, b, &ldb_, &beta, c, &ldc_); sgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
} }
void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
if (handle == NULL) { assert (handle != NULL);
perror("NULL handle");
exit(-1);
}
if ( (transa == 'N' && transb == 'N') || if ( (*transa == 'N' && *transb == 'N') ||
(transa == 'n' && transb == 'N') || (*transa == 'n' && *transb == 'N') ||
(transa == 'N' && transb == 'n') || (*transa == 'N' && *transb == 'n') ||
(transa == 'n' && transb == 'n') ) { (*transa == 'n' && *transb == 'n') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[j*ldb+i]; c[j*ldc+i] = *beta * b[j*ldb+i];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i]; c[j*ldc+i] = *alpha * a[j*lda+i];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[j*ldb+i]; c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
} }
} }
} }
} else if ( (transa == 'N' && transb == 'T') || } else if ( (*transa == 'N' && *transb == 'T') ||
(transa == 'n' && transb == 'T') || (*transa == 'n' && *transb == 'T') ||
(transa == 'N' && transb == 't') || (*transa == 'N' && *transb == 't') ||
(transa == 'n' && transb == 't') ) { (*transa == 'n' && *transb == 't') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[i*ldb+j]; c[j*ldc+i] = *beta * b[i*ldb+j];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i]; c[j*ldc+i] = *alpha * a[j*lda+i];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[i*ldb+j]; c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
} }
} }
} }
} else if ( (transa == 'T' && transb == 'N') || } else if ( (*transa == 'T' && *transb == 'N') ||
(transa == 't' && transb == 'N') || (*transa == 't' && *transb == 'N') ||
(transa == 'T' && transb == 'n') || (*transa == 'T' && *transb == 'n') ||
(transa == 't' && transb == 'n') ) { (*transa == 't' && *transb == 'n') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[j*ldb+i]; c[j*ldc+i] = *beta * b[j*ldb+i];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j]; c[j*ldc+i] = *alpha * a[i*lda+j];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[j*ldb+i]; c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
} }
} }
} }
} else if ( (transa == 'T' && transb == 'T') || } else if ( (*transa == 'T' && *transb == 'T') ||
(transa == 't' && transb == 'T') || (*transa == 't' && *transb == 'T') ||
(transa == 'T' && transb == 't') || (*transa == 'T' && *transb == 't') ||
(transa == 't' && transb == 't') ) { (*transa == 't' && *transb == 't') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[i*ldb+j]; c[j*ldc+i] = *beta * b[i*ldb+j];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j]; c[j*ldc+i] = *alpha * a[i*lda+j];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[i*ldb+j]; c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
} }
} }
@ -371,132 +370,129 @@ void gpu_dgeam(const void* handle, const char transa, const char transb, const i
} }
void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, void gpu_sgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
if (handle == NULL) { assert (handle != NULL);
perror("NULL handle");
exit(-1);
}
if ( (transa == 'N' && transb == 'N') || if ( (*transa == 'N' && *transb == 'N') ||
(transa == 'n' && transb == 'N') || (*transa == 'n' && *transb == 'N') ||
(transa == 'N' && transb == 'n') || (*transa == 'N' && *transb == 'n') ||
(transa == 'n' && transb == 'n') ) { (*transa == 'n' && *transb == 'n') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[j*ldb+i]; c[j*ldc+i] = *beta * b[j*ldb+i];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i]; c[j*ldc+i] = *alpha * a[j*lda+i];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[j*ldb+i]; c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
} }
} }
} }
} else if ( (transa == 'N' && transb == 'T') || } else if ( (*transa == 'N' && *transb == 'T') ||
(transa == 'n' && transb == 'T') || (*transa == 'n' && *transb == 'T') ||
(transa == 'N' && transb == 't') || (*transa == 'N' && *transb == 't') ||
(transa == 'n' && transb == 't') ) { (*transa == 'n' && *transb == 't') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[i*ldb+j]; c[j*ldc+i] = *beta * b[i*ldb+j];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i]; c[j*ldc+i] = *alpha * a[j*lda+i];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[j*lda+i] + beta * b[i*ldb+j]; c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
} }
} }
} }
} else if ( (transa == 'T' && transb == 'N') || } else if ( (*transa == 'T' && *transb == 'N') ||
(transa == 't' && transb == 'N') || (*transa == 't' && *transb == 'N') ||
(transa == 'T' && transb == 'n') || (*transa == 'T' && *transb == 'n') ||
(transa == 't' && transb == 'n') ) { (*transa == 't' && *transb == 'n') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[j*ldb+i]; c[j*ldc+i] = *beta * b[j*ldb+i];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j]; c[j*ldc+i] = *alpha * a[i*lda+j];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[j*ldb+i]; c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
} }
} }
} }
} else if ( (transa == 'T' && transb == 'T') || } else if ( (*transa == 'T' && *transb == 'T') ||
(transa == 't' && transb == 'T') || (*transa == 't' && *transb == 'T') ||
(transa == 'T' && transb == 't') || (*transa == 'T' && *transb == 't') ||
(transa == 't' && transb == 't') ) { (*transa == 't' && *transb == 't') ) {
if (alpha == 0.) { if (*alpha == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = beta * b[i*ldb+j]; c[j*ldc+i] = *beta * b[i*ldb+j];
} }
} }
} else if (beta == 0.) { } else if (*beta == 0.) {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j]; c[j*ldc+i] = *alpha * a[i*lda+j];
} }
} }
} else { } else {
for (int64_t j=0 ; j<n ; ++j) { for (int64_t j=0 ; j<n ; ++j) {
for (int64_t i=0 ; i<n ; ++i) { for (int64_t i=0 ; i<m ; ++i) {
c[j*ldc+i] = alpha * a[i*lda+j] + beta * b[i*ldb+j]; c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
} }
} }

View File

@ -288,25 +288,31 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
else else
print*, ' ao_integrals_map will be used' ! print*, ' ao_integrals_map will be used'
PROVIDE ao_integrals_map ! PROVIDE ao_integrals_map
print*,'Cholesky vectors will be used '
double precision :: get_ao_integ_chol,eri
eri = get_ao_integ_chol(1,1,1,1) ! FOR OPENMP
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & !!! !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) &
!$OMP PRIVATE(i, j, k, l) !$OMP SHARED(ao_num, ao_two_e_tc_tot) &
!$OMP PRIVATE(i, j, k, l,eri)
!$OMP DO COLLAPSE(3) !$OMP DO COLLAPSE(3)
do j = 1, ao_num do j = 1, ao_num
do l = 1, ao_num do l = 1, ao_num
do i = 1, ao_num do i = 1, ao_num
do k = 1, ao_num do k = 1, ao_num
! < 1:i, 2:j | 1:k, 2:l > ! < 1:i, 2:j | 1:k, 2:l >
ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) ! eri = get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
eri = get_ao_integ_chol(i,k,j,l)
ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + eri
enddo enddo
enddo enddo
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
FREE ao_integrals_map ! FREE ao_integrals_map
endif endif
if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then

View File

@ -10,8 +10,6 @@ subroutine provide_all_three_ints_bi_ortho()
implicit none implicit none
double precision :: t1, t2 double precision :: t1, t2
PROVIDE ao_two_e_integrals_in_map
print *, ' start provide_all_three_ints_bi_ortho' print *, ' start provide_all_three_ints_bi_ortho'
call wall_time(t1) call wall_time(t1)

View File

@ -30,7 +30,9 @@ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)]
print *, ' PROVIDING htilde_matrix_elmt_bi_ortho ...' print *, ' PROVIDING htilde_matrix_elmt_bi_ortho ...'
call wall_time(t1) call wall_time(t1)
if(three_body_h_tc)then
call provide_all_three_ints_bi_ortho() call provide_all_three_ints_bi_ortho()
endif
i = 1 i = 1
j = 1 j = 1

View File

@ -1,7 +1,7 @@
program spher_harm program spher_harm
implicit none implicit none
! call test_spher_harm call test_spher_harm
! call test_cart ! call test_cart
call test_brutal_spheric ! call test_brutal_spheric
end end

View File

@ -7,6 +7,7 @@ subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm)
double precision :: theta, phi,r_abs double precision :: theta, phi,r_abs
call cartesian_to_spherical(r,theta,phi,r_abs) call cartesian_to_spherical(r,theta,phi,r_abs)
call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm)
! call spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm)
end end
@ -131,6 +132,10 @@ subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm)
tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta) tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta)
re_ylm = tmp * dcos(phi) re_ylm = tmp * dcos(phi)
im_ylm = tmp * dsin(phi) im_ylm = tmp * dsin(phi)
else if (l==1.and.m==-1)then
tmp = - inv_sq_pi * dsqrt(3.d0/8.d0) * dsin(theta)
re_ylm = tmp * dcos(phi)
im_ylm = -tmp * dsin(phi)
else if(l==1.and.m==0)then else if(l==1.and.m==0)then
tmp = inv_sq_pi * dsqrt(3.d0/4.d0) * dcos(theta) tmp = inv_sq_pi * dsqrt(3.d0/4.d0) * dcos(theta)
re_ylm = tmp re_ylm = tmp
@ -139,10 +144,18 @@ subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm)
tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta) tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta)
re_ylm = tmp * dcos(2.d0*phi) re_ylm = tmp * dcos(2.d0*phi)
im_ylm = tmp * dsin(2.d0*phi) im_ylm = tmp * dsin(2.d0*phi)
else if(l==2.and.m==-2)then
tmp = 0.25d0 * inv_sq_pi * dsqrt(0.5d0*15.d0) * dsin(theta)*dsin(theta)
re_ylm = tmp * dcos(2.d0*phi)
im_ylm =-tmp * dsin(2.d0*phi)
else if(l==2.and.m==1)then else if(l==2.and.m==1)then
tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta) tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta)
re_ylm = tmp * dcos(phi) re_ylm = tmp * dcos(phi)
im_ylm = tmp * dsin(phi) im_ylm = tmp * dsin(phi)
else if(l==2.and.m==-1)then
tmp = - inv_sq_pi * dsqrt(15.d0/8.d0) * dsin(theta) * dcos(theta)
re_ylm = tmp * dcos(phi)
im_ylm =-tmp * dsin(phi)
else if(l==2.and.m==0)then else if(l==2.and.m==0)then
tmp = dsqrt(5.d0/4.d0) * inv_sq_pi* (1.5d0*dcos(theta)*dcos(theta)-0.5d0) tmp = dsqrt(5.d0/4.d0) * inv_sq_pi* (1.5d0*dcos(theta)*dcos(theta)-0.5d0)
re_ylm = tmp re_ylm = tmp

View File

@ -1,3 +1,4 @@
gpu
tc_keywords tc_keywords
jastrow jastrow
qmckl qmckl

View File

@ -2,7 +2,7 @@
! --- ! ---
subroutine provide_int2_grad1_u12_ao() subroutine provide_int2_grad1_u12_ao()
use gpu
BEGIN_DOC BEGIN_DOC
! !
! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2)
@ -35,8 +35,9 @@ subroutine provide_int2_grad1_u12_ao()
double precision :: weight1, ao_k_r, ao_i_r double precision :: weight1, ao_k_r, ao_i_r
double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
double precision :: time0, time1, time2, tc1, tc2, tc double precision :: time0, time1, time2, tc1, tc2, tc
double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:), tc_int_2e_ao(:,:,:,:) type(gpu_double4) :: int2_grad1_u12_ao
double precision, allocatable :: tmp(:,:,:), c_mat(:,:,:), tmp_grad1_u12(:,:,:) type(gpu_double3) :: tmp_grad1_u12, tmp_grad1_u12p, tmp
double precision, allocatable :: c_mat(:,:,:), tc_int_2e_ao(:,:,:,:)
double precision, external :: get_ao_two_e_integral double precision, external :: get_ao_two_e_integral
@ -51,6 +52,7 @@ subroutine provide_int2_grad1_u12_ao()
call total_memory(mem) call total_memory(mem)
mem = max(1.d0, qp_max_mem - mem) mem = max(1.d0, qp_max_mem - mem)
mem = 6
n_double = mem * 1.d8 n_double = mem * 1.d8
n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid))
n_rest = int(mod(n_points_final_grid, n_blocks)) n_rest = int(mod(n_points_final_grid, n_blocks))
@ -64,9 +66,9 @@ subroutine provide_int2_grad1_u12_ao()
! --- ! ---
! --- ! ---
allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) call gpu_allocate(int2_grad1_u12_ao, ao_num,ao_num,n_points_final_grid,4)
allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) call gpu_allocate(tmp,n_points_extra_final_grid,ao_num,ao_num)
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (j, i, jpoint) & !$OMP PRIVATE (j, i, jpoint) &
@ -75,21 +77,28 @@ subroutine provide_int2_grad1_u12_ao()
do j = 1, ao_num do j = 1, ao_num
do i = 1, ao_num do i = 1, ao_num
do jpoint = 1, n_points_extra_final_grid do jpoint = 1, n_points_extra_final_grid
tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) tmp%f(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
enddo enddo
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4)) call gpu_allocate(tmp_grad1_u12,n_points_extra_final_grid,n_blocks,4)
call gpu_allocate(tmp_grad1_u12p,n_points_extra_final_grid,n_blocks,4)
tc = 0.d0 tc = 0.d0
type(gpu_stream) :: stream(4)
do i=1,4
call gpu_stream_create(stream(i))
enddo
do i_pass = 1, n_pass do i_pass = 1, n_pass
ii = (i_pass-1)*n_blocks + 1 ii = (i_pass-1)*n_blocks + 1
call wall_time(tc1) call wall_time(tc1)
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i_blocks, ipoint) & !$OMP PRIVATE (i_blocks, ipoint) &
@ -97,27 +106,26 @@ subroutine provide_int2_grad1_u12_ao()
!$OMP DO !$OMP DO
do i_blocks = 1, n_blocks do i_blocks = 1, n_blocks
ipoint = ii - 1 + i_blocks ! r1 ipoint = ii - 1 + i_blocks ! r1
call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4)) call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_blocks,1), tmp_grad1_u12%f(1,i_blocks,2), &
tmp_grad1_u12%f(1,i_blocks,3), tmp_grad1_u12%f(1,i_blocks,4))
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call wall_time(tc2) call wall_time(tc2)
tc = tc + tc2 - tc1 tc = tc + tc2 - tc1
call gpu_synchronize()
call gpu_copy(tmp_grad1_u12,tmp_grad1_u12p)
do m = 1, 4 do m = 1, 4
call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & call gpu_set_stream(blas_handle, stream(m))
, tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 &
, 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) , tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12p%f(1,1,m), n_points_extra_final_grid &
, 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num)
enddo enddo
enddo enddo
deallocate(tmp_grad1_u12)
if(n_rest .gt. 0) then if(n_rest .gt. 0) then
allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,4))
ii = n_pass*n_blocks + 1 ii = n_pass*n_blocks + 1
call wall_time(tc1) call wall_time(tc1)
@ -128,7 +136,8 @@ subroutine provide_int2_grad1_u12_ao()
!$OMP DO !$OMP DO
do i_rest = 1, n_rest do i_rest = 1, n_rest
ipoint = ii - 1 + i_rest ! r1 ipoint = ii - 1 + i_rest ! r1
call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4)) call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_rest,1), tmp_grad1_u12%f(1,i_rest,2), &
tmp_grad1_u12%f(1,i_rest,3), tmp_grad1_u12%f(1,i_rest,4))
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
@ -136,15 +145,23 @@ subroutine provide_int2_grad1_u12_ao()
tc = tc + tc2 - tc1 tc = tc + tc2 - tc1
do m = 1, 4 do m = 1, 4
call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & call gpu_set_stream(blas_handle, stream(m))
, tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 &
, 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) , tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12%f(1,1,m), n_points_extra_final_grid &
, 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num)
enddo enddo
deallocate(tmp_grad1_u12)
endif endif
call gpu_synchronize()
call gpu_deallocate(tmp_grad1_u12)
call gpu_deallocate(tmp_grad1_u12p)
deallocate(tmp) do i=1,4
call gpu_stream_destroy(stream(i))
enddo
call gpu_deallocate(tmp)
call wall_time(time1) call wall_time(time1)
@ -152,6 +169,8 @@ subroutine provide_int2_grad1_u12_ao()
print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0 print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0
call print_memory_usage() call print_memory_usage()
!TODO
stop
! --- ! ---
! --- ! ---
! --- ! ---
@ -177,7 +196,7 @@ subroutine provide_int2_grad1_u12_ao()
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
, int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , int2_grad1_u12_ao%f(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
, 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) , 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num)
deallocate(c_mat) deallocate(c_mat)
@ -213,7 +232,7 @@ subroutine provide_int2_grad1_u12_ao()
!$OMP END PARALLEL !$OMP END PARALLEL
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 &
, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , int2_grad1_u12_ao%f(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
, 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) , 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num)
enddo enddo
deallocate(c_mat) deallocate(c_mat)
@ -263,7 +282,7 @@ subroutine provide_int2_grad1_u12_ao()
print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao'
open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write")
call ezfio_set_work_empty(.False.) call ezfio_set_work_empty(.False.)
write(11) int2_grad1_u12_ao(:,:,:,1:3) write(11) int2_grad1_u12_ao%f(:,:,:,1:3)
close(11) close(11)
print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot'
@ -276,7 +295,7 @@ subroutine provide_int2_grad1_u12_ao()
! ---- ! ----
deallocate(int2_grad1_u12_ao) call gpu_deallocate(int2_grad1_u12_ao)
deallocate(tc_int_2e_ao) deallocate(tc_int_2e_ao)
call wall_time(time2) call wall_time(time2)

View File

@ -1,3 +1,15 @@
double precision function get_ao_integ_chol(i,j,k,l)
implicit none
BEGIN_DOC
! CHOLESKY representation of the integral of the AO basis <ik|jl> or (ij|kl)
! i(r1) j(r1) 1/r12 k(r2) l(r2)
END_DOC
integer, intent(in) :: i,j,k,l
double precision, external :: ddot
get_ao_integ_chol = ddot(cholesky_ao_num, cholesky_ao_transp(1,i,j), 1, cholesky_ao_transp(1,k,l), 1)
end
BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ]
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -25,7 +37,10 @@ END_PROVIDER
! Last dimension of cholesky_ao is cholesky_ao_num ! Last dimension of cholesky_ao is cholesky_ao_num
! !
! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html ! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html
!
! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1
!
! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf
END_DOC END_DOC
integer*8 :: ndim8 integer*8 :: ndim8
@ -155,11 +170,15 @@ END_PROVIDER
Lset(np8) = p8 Lset(np8) = p8
endif endif
enddo enddo
np = np8 if (np8 > ndim8) stop 'np>ndim8'
np = int(np8,4)
if (np <= 0) stop 'np<=0' if (np <= 0) stop 'np<=0'
if (np > ndim8) stop 'np>ndim8'
rank_max = np
! Avoid too large arrays when there are many electrons
if (elec_num > 10) then
rank_max = min(np,20*elec_num*elec_num) rank_max = min(np,20*elec_num*elec_num)
endif
call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1))
call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /))
@ -428,7 +447,7 @@ END_PROVIDER
Lset(np8) = p8 Lset(np8) = p8
endif endif
enddo enddo
np = np8 np = int(np8,4)
enddo enddo

View File

@ -79,3 +79,9 @@ type: logical
doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change
interface: ezfio,provider,ocaml interface: ezfio,provider,ocaml
default: False default: False
[act_mos_opt]
type: logical
doc: If |true|, the active orbitals are also optimized variationally
interface: ezfio,provider,ocaml
default: False

View File

@ -3,3 +3,4 @@ selectors_full
generators_cas generators_cas
two_body_rdm two_body_rdm
dav_general_mat dav_general_mat
mo_optimization_utils

View File

@ -1,18 +1,25 @@
BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] BEGIN_PROVIDER [real*8, bielec_PQxx_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)]
BEGIN_DOC BEGIN_DOC
! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
!
! Replaced by the Cholesky-based function bielec_PQxx
!
! bielec_PQxx_array : integral (pq|xx) with p,q arbitrary, x core or active
! indices are unshifted orbital numbers ! indices are unshifted orbital numbers
END_DOC END_DOC
implicit none implicit none
integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 integer :: i,j,ii,jj,p,q,i3,j3,t3,v3
real*8 :: mo_two_e_integral real*8 :: mo_two_e_integral
print*,''
print*,'Providing bielec_PQxx_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
print*,''
bielec_PQxx(:,:,:,:) = 0.d0 bielec_PQxx_array(:,:,:,:) = 0.d0
PROVIDE mo_two_e_integrals_in_map PROVIDE mo_two_e_integrals_in_map
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,ii,j,jj,i3,j3) & !$OMP PRIVATE(i,ii,j,jj,i3,j3) &
!$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx_array, &
!$OMP n_act_orb,mo_integrals_map,list_act) !$OMP n_act_orb,mo_integrals_map,list_act)
!$OMP DO !$OMP DO
@ -20,14 +27,14 @@ BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core
ii=list_core_inact(i) ii=list_core_inact(i)
do j=i,n_core_inact_orb do j=i,n_core_inact_orb
jj=list_core_inact(j) jj=list_core_inact(j)
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j),mo_integrals_map)
bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) bielec_PQxx_array(:,:,j,i)=bielec_PQxx_array(:,:,i,j)
end do end do
do j=1,n_act_orb do j=1,n_act_orb
jj=list_act(j) jj=list_act(j)
j3=j+n_core_inact_orb j3=j+n_core_inact_orb
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j3),mo_integrals_map)
bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) bielec_PQxx_array(:,:,j3,i)=bielec_PQxx_array(:,:,i,j3)
end do end do
end do end do
!$OMP END DO !$OMP END DO
@ -40,8 +47,8 @@ BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core
do j=i,n_act_orb do j=i,n_act_orb
jj=list_act(j) jj=list_act(j)
j3=j+n_core_inact_orb j3=j+n_core_inact_orb
call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i3,j3),mo_integrals_map)
bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) bielec_PQxx_array(:,:,j3,i3)=bielec_PQxx_array(:,:,i3,j3)
end do end do
end do end do
!$OMP END DO !$OMP END DO
@ -52,9 +59,13 @@ END_PROVIDER
BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_PROVIDER [real*8, bielec_PxxQ_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)]
BEGIN_DOC BEGIN_DOC
! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
!
! Replaced by the Cholesky-based function bielec_PxxQ
!
! bielec_PxxQ_array : integral (px|xq) with p,q arbitrary, x core or active
! indices are unshifted orbital numbers ! indices are unshifted orbital numbers
END_DOC END_DOC
implicit none implicit none
@ -62,12 +73,15 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
double precision, allocatable :: integrals_array(:,:) double precision, allocatable :: integrals_array(:,:)
real*8 :: mo_two_e_integral real*8 :: mo_two_e_integral
print*,''
print*,'Providing bielec_PxxQ_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
print*,''
PROVIDE mo_two_e_integrals_in_map PROVIDE mo_two_e_integrals_in_map
bielec_PxxQ = 0.d0 bielec_PxxQ_array = 0.d0
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) &
!$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ_array, &
!$OMP n_act_orb,mo_integrals_map,list_act) !$OMP n_act_orb,mo_integrals_map,list_act)
allocate(integrals_array(mo_num,mo_num)) allocate(integrals_array(mo_num,mo_num))
@ -80,8 +94,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
do q=1,mo_num do q=1,mo_num
do p=1,mo_num do p=1,mo_num
bielec_PxxQ(p,i,j,q)=integrals_array(p,q) bielec_PxxQ_array(p,i,j,q)=integrals_array(p,q)
bielec_PxxQ(p,j,i,q)=integrals_array(q,p) bielec_PxxQ_array(p,j,i,q)=integrals_array(q,p)
end do end do
end do end do
end do end do
@ -91,8 +105,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
do q=1,mo_num do q=1,mo_num
do p=1,mo_num do p=1,mo_num
bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) bielec_PxxQ_array(p,i,j3,q)=integrals_array(p,q)
bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) bielec_PxxQ_array(p,j3,i,q)=integrals_array(q,p)
end do end do
end do end do
end do end do
@ -111,8 +125,8 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_a
call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map)
do q=1,mo_num do q=1,mo_num
do p=1,mo_num do p=1,mo_num
bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) bielec_PxxQ_array(p,i3,j3,q)=integrals_array(p,q)
bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) bielec_PxxQ_array(p,j3,i3,q)=integrals_array(q,p)
end do end do
end do end do
end do end do
@ -129,10 +143,15 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
BEGIN_DOC BEGIN_DOC
! bielecCI : integrals (tu|vp) with p arbitrary, tuv active ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active
! index p runs over the whole basis, t,u,v only over the active orbitals ! index p runs over the whole basis, t,u,v only over the active orbitals
!
! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb
END_DOC END_DOC
implicit none implicit none
integer :: i,j,k,p,t,u,v integer :: i,j,k,p,t,u,v
double precision, external :: mo_two_e_integral double precision, external :: mo_two_e_integral
double precision :: wall0, wall1
call wall_time(wall0)
print*,'Providing bielecCI'
PROVIDE mo_two_e_integrals_in_map PROVIDE mo_two_e_integrals_in_map
!$OMP PARALLEL DO DEFAULT(NONE) & !$OMP PARALLEL DO DEFAULT(NONE) &
@ -151,5 +170,7 @@ BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
end do end do
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
call wall_time(wall1)
print*,'Time to provide bielecCI = ',wall1 - wall0
END_PROVIDER END_PROVIDER

View File

@ -1,30 +1,38 @@
BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] BEGIN_PROVIDER [real*8, bielec_PQxx_no_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)]
BEGIN_DOC BEGIN_DOC
! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
!
! Replaced by the Cholesky-based function bielec_PQxx_no
!
! integral (pq|xx) in the basis of natural MOs ! integral (pq|xx) in the basis of natural MOs
! indices are unshifted orbital numbers ! indices are unshifted orbital numbers
!
END_DOC END_DOC
implicit none implicit none
integer :: i,j,k,l,t,u,p,q integer :: i,j,k,l,t,u,p,q
double precision, allocatable :: f(:,:,:), d(:,:,:) double precision, allocatable :: f(:,:,:), d(:,:,:)
print*,''
print*,'Providing bielec_PQxx_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
print*,''
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(j,k,l,p,d,f) & !$OMP PRIVATE(j,k,l,p,d,f) &
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
!$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) !$OMP bielec_PQxx_no_array,bielec_PQxx_array,list_act,natorbsCI)
allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), &
d(n_act_orb,mo_num,n_core_inact_act_orb)) d(n_act_orb,mo_num,n_core_inact_act_orb))
!$OMP DO !$OMP DO
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) bielec_PQxx_no_array(:,:,:,l) = bielec_PQxx_array(:,:,:,l)
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
do p=1,n_act_orb do p=1,n_act_orb
f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) f(p,j,k)=bielec_PQxx_no_array(list_act(p),j,k,l)
end do end do
end do end do
end do end do
@ -36,13 +44,13 @@
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
do p=1,n_act_orb do p=1,n_act_orb
bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) bielec_PQxx_no_array(list_act(p),j,k,l)=d(p,j,k)
end do end do
end do end do
do j=1,mo_num do j=1,mo_num
do p=1,n_act_orb do p=1,n_act_orb
f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) f(p,j,k)=bielec_PQxx_no_array(j,list_act(p),k,l)
end do end do
end do end do
end do end do
@ -54,7 +62,7 @@
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do p=1,n_act_orb do p=1,n_act_orb
do j=1,mo_num do j=1,mo_num
bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) bielec_PQxx_no_array(j,list_act(p),k,l)=d(p,j,k)
end do end do
end do end do
end do end do
@ -71,7 +79,7 @@
do p=1,n_act_orb do p=1,n_act_orb
do k=1,mo_num do k=1,mo_num
do j=1,mo_num do j=1,mo_num
f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) f(j,k,p) = bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l)
end do end do
end do end do
end do end do
@ -83,7 +91,7 @@
do p=1,n_act_orb do p=1,n_act_orb
do k=1,mo_num do k=1,mo_num
do j=1,mo_num do j=1,mo_num
bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l)=d(j,k,p)
end do end do
end do end do
end do end do
@ -97,7 +105,7 @@
do p=1,n_act_orb do p=1,n_act_orb
do k=1,mo_num do k=1,mo_num
do j=1,mo_num do j=1,mo_num
f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) f(j,k,p) = bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p)
end do end do
end do end do
end do end do
@ -109,7 +117,7 @@
do p=1,n_act_orb do p=1,n_act_orb
do k=1,mo_num do k=1,mo_num
do j=1,mo_num do j=1,mo_num
bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p)
end do end do
end do end do
end do end do
@ -123,8 +131,12 @@ END_PROVIDER
BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_PROVIDER [real*8, bielec_PxxQ_no_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)]
BEGIN_DOC BEGIN_DOC
! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!!
!
! Replaced by the Cholesky-based function bielec_PxxQ_no
!
! integral (px|xq) in the basis of natural MOs ! integral (px|xq) in the basis of natural MOs
! indices are unshifted orbital numbers ! indices are unshifted orbital numbers
END_DOC END_DOC
@ -132,10 +144,14 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
integer :: i,j,k,l,t,u,p,q integer :: i,j,k,l,t,u,p,q
double precision, allocatable :: f(:,:,:), d(:,:,:) double precision, allocatable :: f(:,:,:), d(:,:,:)
print*,''
print*,'Providing bielec_PxxQ_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!'
print*,''
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(j,k,l,p,d,f) & !$OMP PRIVATE(j,k,l,p,d,f) &
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
!$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) !$OMP bielec_PxxQ_no_array,bielec_PxxQ_array,list_act,natorbsCI)
allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), &
@ -143,11 +159,11 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
!$OMP DO !$OMP DO
do j=1,mo_num do j=1,mo_num
bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) bielec_PxxQ_no_array(:,:,:,j) = bielec_PxxQ_array(:,:,:,j)
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do p=1,n_act_orb do p=1,n_act_orb
f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) f(p,k,l) = bielec_PxxQ_no_array(list_act(p),k,l,j)
end do end do
end do end do
end do end do
@ -159,7 +175,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do p=1,n_act_orb do p=1,n_act_orb
bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) bielec_PxxQ_no_array(list_act(p),k,l,j)=d(p,k,l)
end do end do
end do end do
end do end do
@ -176,7 +192,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
do p=1,n_act_orb do p=1,n_act_orb
f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) f(p,j,l) = bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k)
end do end do
end do end do
end do end do
@ -188,7 +204,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
do p=1,n_act_orb do p=1,n_act_orb
bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k)=d(p,j,l)
end do end do
end do end do
end do end do
@ -205,7 +221,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do p=1,n_act_orb do p=1,n_act_orb
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) f(j,l,p) = bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k)
end do end do
end do end do
end do end do
@ -217,7 +233,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do p=1,n_act_orb do p=1,n_act_orb
do l=1,n_core_inact_act_orb do l=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k)=d(j,l,p)
end do end do
end do end do
end do end do
@ -231,7 +247,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do p=1,n_act_orb do p=1,n_act_orb
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) f(j,k,p) = bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p)
end do end do
end do end do
end do end do
@ -243,7 +259,7 @@ BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inac
do p=1,n_act_orb do p=1,n_act_orb
do k=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb
do j=1,mo_num do j=1,mo_num
bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p)
end do end do
end do end do
end do end do
@ -259,11 +275,17 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
BEGIN_DOC BEGIN_DOC
! integrals (tu|vp) in the basis of natural MOs ! integrals (tu|vp) in the basis of natural MOs
! index p runs over the whole basis, t,u,v only over the active orbitals ! index p runs over the whole basis, t,u,v only over the active orbitals
!
! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb
END_DOC END_DOC
implicit none implicit none
integer :: i,j,k,l,t,u,p,q integer :: i,j,k,l,t,u,p,q
double precision, allocatable :: f(:,:,:), d(:,:,:) double precision, allocatable :: f(:,:,:), d(:,:,:)
double precision :: wall0, wall1
call wall_time(wall0)
print*,'Providing bielecCI_no'
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(j,k,l,p,d,f) & !$OMP PRIVATE(j,k,l,p,d,f) &
!$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, &
@ -363,6 +385,8 @@ BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)]
deallocate(d,f) deallocate(d,f)
!$OMP END PARALLEL !$OMP END PARALLEL
call wall_time(wall1)
print*,'Time to provide bielecCI_no = ',wall1-wall0
END_PROVIDER END_PROVIDER

View File

@ -11,7 +11,7 @@ program casscf
if(small_active_space)then if(small_active_space)then
pt2_relative_error = 0.00001 pt2_relative_error = 0.00001
else else
thresh_scf = 1.d-4 thresh_scf = max(1.d-4,thresh_scf)
pt2_relative_error = 0.04 pt2_relative_error = 0.04
endif endif
touch pt2_relative_error touch pt2_relative_error
@ -46,6 +46,11 @@ subroutine run
do while (.not.converged) do while (.not.converged)
print*,'pt2_max = ',pt2_max print*,'pt2_max = ',pt2_max
call run_stochastic_cipsi(Ev,PT2) call run_stochastic_cipsi(Ev,PT2)
! if(act_mos_opt)then DOES NOT WORK
! call run_orb_opt_trust_v2
! call run_stochastic_cipsi(Ev,PT2)
! endif
if(.True.)then
print*,'Ev,PT2',Ev(1),PT2(1) print*,'Ev,PT2',Ev(1),PT2(1)
E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states)
energy_old = energy energy_old = energy
@ -55,9 +60,9 @@ subroutine run
call write_time(6) call write_time(6)
call write_int(6,iteration,'CAS-SCF iteration = ') call write_int(6,iteration,'CAS-SCF iteration = ')
call write_double(6,energy,'State-average CAS-SCF energy = ') call write_double(6,energy,'State-average CAS-SCF energy = ')
! if(n_states == 1)then !! if(n_states == 1)then
! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) !! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2)
! call ezfio_get_casscf_cipsi_energy(PT2) !! call ezfio_get_casscf_cipsi_energy(PT2)
double precision :: delta_E_istate, e_av double precision :: delta_E_istate, e_av
e_av = 0.d0 e_av = 0.d0
do istate=1,N_states do istate=1,N_states
@ -68,12 +73,12 @@ subroutine run
endif endif
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate) write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate)
write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate) write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate)
! call write_double(6,E_PT2(istate),'E + PT2 energy = ') !! call write_double(6,E_PT2(istate),'E + PT2 energy = ')
! call write_double(6,PT2(istate),' PT2 = ') !! call write_double(6,PT2(istate),' PT2 = ')
enddo enddo
call write_double(6,e_av,'State-average CAS-SCF energy bis = ') call write_double(6,e_av,'State-average CAS-SCF energy bis = ')
call write_double(6,pt2_max,' PT2_MAX = ') call write_double(6,pt2_max,' PT2_MAX = ')
! endif !! endif
print*,'' print*,''
call write_double(6,norm_grad_vec2,'Norm of gradients = ') call write_double(6,norm_grad_vec2,'Norm of gradients = ')
@ -132,8 +137,10 @@ subroutine run
soft_touch state_following_casscf soft_touch state_following_casscf
endif endif
endif endif
endif
enddo enddo
if(.True.)then
integer :: i integer :: i
print*,'Converged CASSCF ' print*,'Converged CASSCF '
print*,'--------------------------' print*,'--------------------------'
@ -153,6 +160,7 @@ subroutine run
! write(*,*)mcscf_fock_alpha_mo(i,i) ! write(*,*)mcscf_fock_alpha_mo(i,i)
enddo enddo
endif
end end

View File

@ -0,0 +1,248 @@
BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_act_orb, mo_num)]
BEGIN_DOC
! Cholesky vectors with ONE orbital on the active natural orbital basis
END_DOC
implicit none
integer :: i_chol,i_act,i_mo,jj_act
double precision, allocatable :: chol_tmp(:,:)
double precision :: wall0,wall1
call wall_time(wall0)
print*,'Providing cholesky_no_1_idx_transp'
allocate(chol_tmp(cholesky_mo_num,n_act_orb))
cholesky_no_1_idx_transp = 0.D0
do i_mo = 1, mo_num
! Get all the integrals corresponding to the "i_mo"
do i_act = 1, n_act_orb
jj_act = list_act(i_act)
do i_chol = 1, cholesky_mo_num
chol_tmp(i_chol, i_act) = cholesky_mo_transp(i_chol, jj_act, i_mo)
enddo
enddo
call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, &
chol_tmp, size(chol_tmp,1), &
natorbsCI, size(natorbsCI,1), &
0.d0, &
cholesky_no_1_idx_transp(1,1,i_mo), size(cholesky_no_1_idx_transp,1))
enddo
call wall_time(wall1)
print*,'Time to provide cholesky_no_1_idx_transp = ', wall1 - wall0
END_PROVIDER
BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)]
BEGIN_DOC
! Cholesky vectors with TWO orbital on the active natural orbital basis
END_DOC
implicit none
integer :: i_chol,i_act,j_act,jj_act
double precision, allocatable :: chol_tmp(:,:),chol_tmp_bis(:,:)
allocate(chol_tmp(cholesky_mo_num,n_act_orb),chol_tmp_bis(cholesky_mo_num,n_act_orb))
double precision :: wall0,wall1
call wall_time(wall0)
print*,'Providing cholesky_no_2_idx_transp'
cholesky_no_2_idx_transp = 0.D0
do i_act = 1, n_act_orb
! Get all the integrals corresponding to the "j_act"
do j_act = 1, n_act_orb
jj_act = list_act(j_act)
do i_chol = 1, cholesky_mo_num
chol_tmp(i_chol, j_act) = cholesky_no_1_idx_transp(i_chol, i_act, jj_act)
enddo
enddo
call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, &
chol_tmp, size(chol_tmp,1), &
natorbsCI, size(natorbsCI,1), &
0.d0, &
cholesky_no_2_idx_transp(1,1,i_act), size(cholesky_no_2_idx_transp,1))
enddo
call wall_time(wall1)
print*,'Time to provide cholesky_no_2_idx_transp = ', wall1 - wall0
END_PROVIDER
BEGIN_PROVIDER [ double precision, cholesky_no_total_transp, (cholesky_mo_num, mo_num, mo_num)]
implicit none
BEGIN_DOC
! Cholesky vectors defined on all basis including the NO basis
END_DOC
integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact
integer :: i_virt, ii_virt, j_virt, jj_virt
double precision :: wall0,wall1
call wall_time(wall0)
print*,'Providing cholesky_no_total_transp '
! Block when two orbitals belong to the core/inact
do j_core_inact = 1, n_core_inact_orb
jj_core_inact = list_core_inact(j_core_inact)
do i_core_inact = 1, n_core_inact_orb
ii_core_inact = list_core_inact(i_core_inact)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol, ii_core_inact, jj_core_inact) = cholesky_mo_transp(i_chol,ii_core_inact,jj_core_inact)
enddo
enddo
enddo
! Block when one orbitals belongs to the core/inact and one belongs to the active
do j_core_inact = 1, n_core_inact_orb
jj_core_inact = list_core_inact(j_core_inact)
do i_act = 1, n_act_orb
ii_act = list_act(i_act)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol,ii_act,j_core_inact) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact)
enddo
enddo
enddo
do j_core_inact = 1, n_core_inact_orb
jj_core_inact = list_core_inact(j_core_inact)
do i_act = 1, n_act_orb
ii_act = list_act(i_act)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol,j_core_inact,ii_act) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact)
enddo
enddo
enddo
! Block when two orbitals belong to the active
do j_act = 1, n_act_orb
jj_act = list_act(j_act)
do i_act = 1, n_act_orb
ii_act = list_act(i_act)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol,ii_act,jj_act) = cholesky_no_2_idx_transp(i_chol,i_act,j_act)
enddo
enddo
enddo
! Block when two orbitals belong to the virtuals
do i_virt = 1, n_virt_orb
ii_virt = list_virt(i_virt)
do j_virt = 1, n_virt_orb
jj_virt = list_virt(j_virt)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol,jj_virt,ii_virt) = cholesky_mo_transp(i_chol,jj_virt,ii_virt)
enddo
enddo
enddo
! Block when one orbital is in active and the other in the virtuals
do i_virt = 1, n_virt_orb
ii_virt = list_virt(i_virt)
do i_act = 1, n_act_orb
ii_act = list_act(i_act)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol,ii_act,ii_virt) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt)
enddo
enddo
enddo
do i_virt = 1, n_virt_orb
ii_virt = list_virt(i_virt)
do i_act = 1, n_act_orb
ii_act = list_act(i_act)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol,ii_virt,ii_act) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt)
enddo
enddo
enddo
! Block when one orbital is in the virtual and one in the core-inact
do i_virt = 1, n_virt_orb
ii_virt = list_virt(i_virt)
do i_core_inact = 1, n_core_inact_orb
ii_core_inact = list_core_inact(i_core_inact)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol, ii_core_inact, ii_virt) = cholesky_mo_transp(i_chol, ii_core_inact, ii_virt)
enddo
enddo
enddo
do i_core_inact = 1, n_core_inact_orb
ii_core_inact = list_core_inact(i_core_inact)
do i_virt = 1, n_virt_orb
ii_virt = list_virt(i_virt)
do i_chol = 1, cholesky_mo_num
cholesky_no_total_transp(i_chol, ii_virt, ii_core_inact) = cholesky_mo_transp(i_chol, ii_virt, ii_core_inact)
enddo
enddo
enddo
call wall_time(wall1)
print*,'Time to provide cholesky_no_total_transp = ', wall1 - wall0
END_PROVIDER
double precision function bielec_no_basis(i_1,j_1,i_2,j_2)
implicit none
integer, intent(in) :: i_1,j_1,i_2,j_2
BEGIN_DOC
! integral (i_1 j_1|i_2 j_2) in the mixed basis of both MOs and natural MOs
!
END_DOC
integer :: i
bielec_no_basis = 0.d0
do i = 1, cholesky_mo_num
bielec_no_basis += cholesky_no_total_transp(i,i_1, j_1) * cholesky_no_total_transp(i,i_2,j_2)
enddo
end
double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca)
implicit none
BEGIN_DOC
! function that computes (i_mo j_mo| i_ca j_ca) with Cholesky decomposition on the NO basis for active orbitals
!
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
END_DOC
integer, intent(in) :: i_ca, j_ca, i_mo, j_mo
integer :: ii_ca, jj_ca
double precision :: bielec_no_basis
ii_ca = list_core_inact_act(i_ca)
jj_ca = list_core_inact_act(j_ca)
bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca)
end
double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo)
implicit none
BEGIN_DOC
! function that computes (i_mo j_ca |i_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals
!
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
END_DOC
integer, intent(in) :: i_ca, j_ca, i_mo, j_mo
integer :: ii_ca, jj_ca
double precision :: bielec_no_basis
ii_ca = list_core_inact_act(i_ca)
jj_ca = list_core_inact_act(j_ca)
bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo)
end
double precision function bielec_PQxx(i_mo, j_mo, i_ca, j_ca)
BEGIN_DOC
! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition
!
! indices are unshifted orbital numbers
!
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
END_DOC
implicit none
integer, intent(in) :: i_ca, j_ca, j_mo, i_mo
double precision :: mo_two_e_integral
integer :: ii_ca, jj_ca
ii_ca = list_core_inact_act(i_ca)
jj_ca = list_core_inact_act(j_ca)
bielec_PQxx = mo_two_e_integral(i_mo,ii_ca,j_mo,jj_ca)
end
double precision function bielec_PxxQ(i_mo, i_ca, j_ca, j_mo)
BEGIN_DOC
! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition
!
! where i_ca, j_ca are in [1:n_core_inact_act_orb]
END_DOC
implicit none
integer, intent(in) :: i_ca, j_ca, j_mo, i_mo
double precision :: mo_two_e_integral
integer :: ii_ca, jj_ca
ii_ca = list_core_inact_act(i_ca)
jj_ca = list_core_inact_act(j_ca)
bielec_PxxQ = mo_two_e_integral(i_mo,jj_ca,ii_ca,j_mo)
end

View File

@ -0,0 +1,34 @@
!!!!! FUNCTIONS THAT WORK BUT WHICH ARE USELESS AS THE ARRAYS CAN ALWAYS BE STORED
!double precision function bielecCI_chol(i_a, j_a, k_a, i_mo)
! BEGIN_DOC
! ! function that computes (i_a j_a |k_a j_mo) with Cholesky decomposition
! !
! ! where i_a, j_a, k_a are in [1:n_act_orb] !!! ONLY ON ACTIVE
! END_DOC
! implicit none
! integer, intent(in) :: i_a, j_a, k_a, i_mo
! integer :: ii_a, jj_a, kk_a
! double precision :: mo_two_e_integral
! ii_a = list_act(i_a)
! jj_a = list_act(j_a)
! kk_a = list_act(k_a)
! bielecCI_chol = mo_two_e_integral(ii_a,kk_a,jj_a,i_mo)
!end
!double precision function bielecCI_no_chol(i_ca, j_ca, k_ca, i_mo)
! BEGIN_DOC
! ! function that computes (i_ca j_ca |k_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals
! !
! ! where i_ca, j_ca, k_ca are in [1:n_core_inact_act_orb]
! END_DOC
! implicit none
! integer, intent(in) :: i_ca, j_ca, k_ca, i_mo
! integer :: ii_ca, jj_ca, kk_ca
! double precision :: bielec_no_basis_chol
! ii_ca = list_act(i_ca)
! jj_ca = list_act(j_ca)
! kk_ca = list_act(k_ca)
! bielecCI_no_chol = bielec_no_basis_chol(ii_ca, jj_ca, kk_ca, i_mo)
!
!end

View File

@ -157,6 +157,7 @@ real*8 function gradvec_it(i,t)
integer :: ii,tt,v,vv,x,y integer :: ii,tt,v,vv,x,y
integer :: x3,y3 integer :: x3,y3
double precision :: bielec_PQxx_no
ii=list_core_inact(i) ii=list_core_inact(i)
tt=list_act(t) tt=list_act(t)

View File

@ -10,6 +10,7 @@ real*8 function hessmat_itju(i,t,j,u)
implicit none implicit none
integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj
real*8 :: term,t2 real*8 :: term,t2
double precision :: bielec_pqxx_no,bielec_pxxq_no
ii=list_core_inact(i) ii=list_core_inact(i)
tt=list_act(t) tt=list_act(t)
@ -95,6 +96,7 @@ real*8 function hessmat_itja(i,t,j,a)
implicit none implicit none
integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y
real*8 :: term real*8 :: term
double precision :: bielec_pqxx_no,bielec_pxxq_no
! it/ja ! it/ja
ii=list_core_inact(i) ii=list_core_inact(i)
@ -128,6 +130,7 @@ real*8 function hessmat_itua(i,t,u,a)
implicit none implicit none
integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3
real*8 :: term real*8 :: term
double precision :: bielec_pqxx_no,bielec_pxxq_no
ii=list_core_inact(i) ii=list_core_inact(i)
tt=list_act(t) tt=list_act(t)
@ -169,6 +172,7 @@ real*8 function hessmat_iajb(i,a,j,b)
implicit none implicit none
integer :: i,a,j,b,ii,aa,jj,bb integer :: i,a,j,b,ii,aa,jj,bb
real*8 :: term real*8 :: term
double precision :: bielec_pqxx_no,bielec_pxxq_no
ii=list_core_inact(i) ii=list_core_inact(i)
aa=list_virt(a) aa=list_virt(a)
@ -205,6 +209,7 @@ real*8 function hessmat_iatb(i,a,t,b)
implicit none implicit none
integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3
real*8 :: term real*8 :: term
double precision :: bielec_pqxx_no,bielec_pxxq_no
ii=list_core_inact(i) ii=list_core_inact(i)
aa=list_virt(a) aa=list_virt(a)
@ -237,6 +242,7 @@ real*8 function hessmat_taub(t,a,u,b)
integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y
integer :: v3,x3 integer :: v3,x3
real*8 :: term,t1,t2,t3 real*8 :: term,t1,t2,t3
double precision :: bielec_pqxx_no,bielec_pxxq_no
tt=list_act(t) tt=list_act(t)
aa=list_virt(a) aa=list_virt(a)

View File

@ -4,6 +4,7 @@ BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ]
END_DOC END_DOC
implicit none implicit none
integer :: p,q,k,kk,t,tt,u,uu integer :: p,q,k,kk,t,tt,u,uu
double precision :: bielec_pxxq_no, bielec_pqxx_no
do q=1,mo_num do q=1,mo_num
do p=1,mo_num do p=1,mo_num
@ -44,6 +45,7 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ]
END_DOC END_DOC
implicit none implicit none
integer :: p,q,k,kk,t,tt,u,uu integer :: p,q,k,kk,t,tt,u,uu
double precision :: bielec_pxxq_no, bielec_pqxx_no
Fapq = 0.d0 Fapq = 0.d0

View File

@ -0,0 +1,116 @@
program test_chol
implicit none
read_wf= .True.
touch read_wf
! call routine_bielec_PxxQ_no
! call routine_bielecCI_no
! call test_bielec_PxxQ_chol
! call test_bielecCI
end
subroutine routine_bielec_PQxx_no
implicit none
integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact
integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo
double precision :: exact, new, error, accu, bielec_no_basis_chol
double precision :: bielec_PQxx_no
accu = 0.d0
do i_core_inact = 1, n_core_inact_act_orb
ii_core_inact = list_core_inact_act(i_core_inact)
do j_core_inact = 1, n_core_inact_act_orb
jj_core_inact = list_core_inact_act(j_core_inact)
do i_mo = 1, mo_num
do j_mo = 1, mo_num
exact = bielec_PQxx_no_array(j_mo,i_mo, j_core_inact, i_core_inact)
new = bielec_PQxx_no(j_mo,i_mo, j_core_inact, i_core_inact)
error = dabs(exact-new)
if(dabs(exact).gt.1.d-10)then
print*,exact,new,error
endif
accu += error
enddo
enddo
enddo
enddo
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
end
subroutine routine_bielec_PxxQ_no_array
implicit none
integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact
integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo
double precision :: exact, new, error, accu, bielec_no_basis_chol
double precision :: bielec_PxxQ_no
accu = 0.d0
do i_mo = 1, mo_num
do i_core_inact = 1, n_core_inact_act_orb
ii_core_inact = list_core_inact_act(i_core_inact)
do j_core_inact = 1, n_core_inact_act_orb
jj_core_inact = list_core_inact_act(j_core_inact)
do j_mo = 1, mo_num
exact = bielec_PxxQ_no_array(j_mo, j_core_inact, i_core_inact,i_mo)
! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact)
new = bielec_PxxQ_no(j_mo, j_core_inact, i_core_inact,i_mo)
error = dabs(exact-new)
accu += error
if(dabs(exact).gt.1.d-10)then
print*,exact,new,error
endif
enddo
enddo
enddo
enddo
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
end
subroutine test_bielec_PQxx(i_mo, j_mo, i_ca, j_ca)
implicit none
integer :: i_mo, j_mo, i_ca, j_ca
double precision :: exact, new, error, accu
double precision :: bielec_PQxx
accu = 0.d0
do j_ca = 1, n_core_inact_act_orb
do i_ca = 1, n_core_inact_act_orb
do j_mo = 1, mo_num
do i_mo = 1, mo_num
exact = bielec_PQxx_array(i_mo, j_mo, i_ca, j_ca)
new = bielec_PQxx(i_mo, j_mo, i_ca, j_ca)
error = dabs(exact-new)
accu += error
if(dabs(exact).gt.1.d-10)then
print*,exact,new,error
endif
enddo
enddo
enddo
enddo
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
end
subroutine test_bielec_PxxQ_chol(i_mo, i_ca, j_ca, j_mo)
implicit none
integer :: i_mo, i_ca, j_ca, j_mo
double precision :: exact, new, error, accu
double precision :: bielec_PxxQ
accu = 0.d0
do j_mo = 1, mo_num
do j_ca = 1, n_core_inact_act_orb
do i_ca =1, n_core_inact_act_orb
do i_mo = 1, mo_num
exact = bielec_PxxQ_array(i_mo, i_ca, j_ca, j_mo)
new = bielec_PxxQ(i_mo, i_ca, j_ca, j_mo)
error = dabs(exact-new)
accu += error
if(dabs(exact).gt.1.d-10)then
print*,exact,new,error
endif
enddo
enddo
enddo
enddo
print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2))
end

View File

@ -8,6 +8,7 @@
implicit none implicit none
integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3
real*8 :: e_one_all,e_two_all real*8 :: e_one_all,e_two_all
double precision :: bielec_PQxx,bielec_PxxQ
e_one_all=0.D0 e_one_all=0.D0
e_two_all=0.D0 e_two_all=0.D0
do i=1,n_core_inact_orb do i=1,n_core_inact_orb

View File

@ -1,2 +1,3 @@
gpu
hartree_fock hartree_fock
utils_cc utils_cc

View File

@ -1,4 +1,5 @@
subroutine run_ccsd_space_orb subroutine run_ccsd_space_orb
use gpu
implicit none implicit none
@ -9,9 +10,19 @@ subroutine run_ccsd_space_orb
double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb
logical :: not_converged logical :: not_converged
double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) type(gpu_double4) :: t2, r2, tau, tau_x
double precision, allocatable :: t1(:,:), r1(:,:) type(gpu_double2) :: t1, r1
double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) type(gpu_double2) :: H_oo, H_vv, H_vo
type(gpu_double2) :: d_cc_space_f_oo, d_cc_space_f_vo
type(gpu_double2) :: d_cc_space_f_ov, d_cc_space_f_vv
type(gpu_double3) :: d_cc_space_v_oo_chol, d_cc_space_v_vo_chol
type(gpu_double3) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol
type(gpu_double4) :: d_cc_space_v_oovv, d_cc_space_v_voov, d_cc_space_v_ovov
type(gpu_double4) :: d_cc_space_v_oovo, d_cc_space_v_vooo, d_cc_space_v_oooo
type(gpu_double4) :: d_cc_space_v_vvoo, d_cc_space_v_ovvo, d_cc_space_v_ovoo
double precision, allocatable :: all_err(:,:), all_t(:,:) double precision, allocatable :: all_err(:,:), all_t(:,:)
integer, allocatable :: list_occ(:), list_vir(:) integer, allocatable :: list_occ(:), list_vir(:)
@ -20,7 +31,7 @@ subroutine run_ccsd_space_orb
call set_multiple_levels_omp(.False.) call set_multiple_levels_omp(.False.)
if (do_ao_cholesky) then if (do_mo_cholesky) then
PROVIDE cholesky_mo_transp PROVIDE cholesky_mo_transp
FREE cholesky_ao FREE cholesky_ao
else else
@ -51,11 +62,77 @@ subroutine run_ccsd_space_orb
!print*,'occ',list_occ !print*,'occ',list_occ
!print*,'vir',list_vir !print*,'vir',list_vir
allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) ! GPU arrays
allocate(tau(nO,nO,nV,nV)) call gpu_allocate(d_cc_space_f_oo, nO, nO)
allocate(tau_x(nO,nO,nV,nV)) call gpu_allocate(d_cc_space_f_vo, nV, nO)
allocate(t1(nO,nV), r1(nO,nV)) call gpu_allocate(d_cc_space_f_ov, nO, nV)
allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) call gpu_allocate(d_cc_space_f_vv, nV, nV)
call gpu_upload(cc_space_f_oo, d_cc_space_f_oo)
call gpu_upload(cc_space_f_vo, d_cc_space_f_vo)
call gpu_upload(cc_space_f_ov, d_cc_space_f_ov)
call gpu_upload(cc_space_f_vv, d_cc_space_f_vv)
! FREE cc_space_f_oo
! FREE cc_space_f_vo
! FREE cc_space_f_vv
if (do_mo_cholesky) then
call gpu_allocate(d_cc_space_v_oo_chol, cholesky_mo_num, nO, nO)
call gpu_allocate(d_cc_space_v_ov_chol, cholesky_mo_num, nO, nV)
call gpu_allocate(d_cc_space_v_vo_chol, cholesky_mo_num, nV, nO)
call gpu_allocate(d_cc_space_v_vv_chol, cholesky_mo_num, nV, nV)
call gpu_upload(cc_space_v_oo_chol, d_cc_space_v_oo_chol)
call gpu_upload(cc_space_v_ov_chol, d_cc_space_v_ov_chol)
call gpu_upload(cc_space_v_vo_chol, d_cc_space_v_vo_chol)
call gpu_upload(cc_space_v_vv_chol, d_cc_space_v_vv_chol)
! FREE cc_space_v_oo_chol
! FREE cc_space_v_ov_chol
! FREE cc_space_v_vo_chol
! FREE cc_space_v_vv_chol
endif
call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV)
call gpu_allocate(d_cc_space_v_voov, nV, nO, nO, nV)
call gpu_allocate(d_cc_space_v_ovov, nO, nV, nO, nV)
call gpu_allocate(d_cc_space_v_oovo, nO, nO, nV, nO)
call gpu_allocate(d_cc_space_v_ovvo, nO, nV, nV, nO)
call gpu_allocate(d_cc_space_v_vooo, nV, nO, nO, nO)
call gpu_allocate(d_cc_space_v_oooo, nO, nO, nO, nO)
call gpu_allocate(d_cc_space_v_vvoo, nV, nV, nO, nO)
call gpu_allocate(d_cc_space_v_ovoo, nO, nV, nO, nO)
call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv)
call gpu_upload(cc_space_v_voov, d_cc_space_v_voov)
call gpu_upload(cc_space_v_ovov, d_cc_space_v_ovov)
call gpu_upload(cc_space_v_oovo, d_cc_space_v_oovo)
call gpu_upload(cc_space_v_ovvo, d_cc_space_v_ovvo)
call gpu_upload(cc_space_v_vooo, d_cc_space_v_vooo)
call gpu_upload(cc_space_v_oooo, d_cc_space_v_oooo)
call gpu_upload(cc_space_v_vvoo, d_cc_space_v_vvoo)
call gpu_upload(cc_space_v_ovoo, d_cc_space_v_ovoo)
! FREE cc_space_v_voov
! FREE cc_space_v_ovov
! FREE cc_space_v_oovo
! FREE cc_space_v_oovv
! FREE cc_space_v_vooo
! FREE cc_space_v_oooo
! FREE cc_space_v_vvoo
! FREE cc_space_v_ovvo
! FREE cc_space_v_ovoo
call gpu_allocate(t2, nO,nO,nV,nV)
call gpu_allocate(r2, nO,nO,nV,nV)
call gpu_allocate(tau, nO,nO,nV,nV)
call gpu_allocate(tau_x, nO,nO,nV,nV)
call gpu_allocate(t1, nO,nV)
call gpu_allocate(r1, nO,nV)
call gpu_allocate(H_oo, nO, nO)
call gpu_allocate(H_vo, nV, nO)
call gpu_allocate(H_vv, nV, nV)
if (cc_update_method == 'diis') then if (cc_update_method == 'diis') then
double precision :: rss, diis_mem, extra_mem double precision :: rss, diis_mem, extra_mem
@ -97,14 +174,22 @@ subroutine run_ccsd_space_orb
endif endif
! Init ! Init
call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:)
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) allocate(h_t1(nO,nV), h_t2(nO,nO,nV,nV))
call update_tau_space(nO,nV,t1,t2,tau)
call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,h_t1)
call gpu_upload(h_t1, t1)
call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2)
call gpu_upload(h_t2, t2)
call update_tau_space(nO,nV,h_t1,t1,t2,tau)
call update_tau_x_space(nO,nV,tau,tau_x) call update_tau_x_space(nO,nV,tau,tau_x)
!print*,'hf_energy', hf_energy
call det_energy(det,uncorr_energy) call det_energy(det,uncorr_energy)
print*,'Det energy', uncorr_energy print*,'Det energy', uncorr_energy
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy)
call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
print*,'Guess energy', uncorr_energy+energy, energy print*,'Guess energy', uncorr_energy+energy, energy
nb_iter = 0 nb_iter = 0
@ -120,43 +205,45 @@ subroutine run_ccsd_space_orb
do while (not_converged) do while (not_converged)
! Residue ! Residue
if (do_ao_cholesky) then if (do_mo_cholesky) then
! if (.False.) then call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo)
call compute_H_oo_chol(nO,nV,tau_x,H_oo) call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, d_cc_space_v_ov_chol,H_vv)
call compute_H_vv_chol(nO,nV,tau_x,H_vv) call compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo)
call compute_H_vo_chol(nO,nV,t1,H_vo)
call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, &
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol)
call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, &
d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, &
d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, &
d_cc_space_f_vo, &
r2, max_r2)
else else
call compute_H_oo(nO,nV,t1,t2,tau,H_oo) call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f)
call compute_H_vv(nO,nV,t1,t2,tau,H_vv) call compute_H_vv(nO,nV,t1%f,t2%f,tau%f,H_vv%f)
call compute_H_vo(nO,nV,t1,t2,H_vo) call compute_H_vo(nO,nV,t1%f,t2%f,H_vo%f)
call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r1_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r1%f,max_r1)
call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) call compute_r2_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r2%f,max_r2)
endif endif
max_r = max(max_r1,max_r2) max_r = max(max_r1,max_r2)
! Update ! Update
if (cc_update_method == 'diis') then if (cc_update_method == 'diis') then
!call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t)
!call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2)
call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t)
! Standard update as T = T - Delta ! Standard update as T = T - Delta
elseif (cc_update_method == 'none') then elseif (cc_update_method == 'none') then
call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1%f,t1%f)
call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2%f,t2%f)
else else
print*,'Unkown cc_method_method: '//cc_update_method print*,'Unkown cc_method_method: '//cc_update_method
endif endif
call update_tau_space(nO,nV,t1,t2,tau) call update_tau_space(nO,nV,t1%f,t1,t2,tau)
call update_tau_x_space(nO,nV,tau,tau_x) call update_tau_x_space(nO,nV,tau,tau_x)
! Energy ! Energy
call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |'
nb_iter = nb_iter + 1 nb_iter = nb_iter + 1
@ -181,8 +268,8 @@ subroutine run_ccsd_space_orb
print*,'' print*,''
if (write_amplitudes) then if (write_amplitudes) then
call write_t1(nO,nV,t1) call write_t1(nO,nV,t1%f)
call write_t2(nO,nV,t2) call write_t2(nO,nV,t2%f)
call ezfio_set_utils_cc_io_amplitudes('Read') call ezfio_set_utils_cc_io_amplitudes('Read')
endif endif
@ -191,7 +278,14 @@ subroutine run_ccsd_space_orb
deallocate(all_err,all_t) deallocate(all_err,all_t)
endif endif
deallocate(H_vv,H_oo,H_vo,r1,r2,tau) call gpu_deallocate(H_oo)
call gpu_deallocate(H_vv)
call gpu_deallocate(H_vo)
call gpu_deallocate(r1)
call gpu_deallocate(r2)
call gpu_deallocate(tau)
call gpu_deallocate(tau_x)
! CCSD(T) ! CCSD(T)
double precision :: e_t, e_t_err double precision :: e_t, e_t_err
@ -199,28 +293,14 @@ subroutine run_ccsd_space_orb
if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then
! Dumb way
!call wall_time(ta)
!call ccsd_par_t_space(nO,nV,t1,t2,e_t)
!call wall_time(tb)
!print*,'Time: ',tb-ta, ' s'
!print*,''
!write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha'
!write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha'
!write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha'
!print*,''
! New ! New
e_t = uncorr_energy + energy ! For print in (T) call e_t = uncorr_energy + energy ! For print in (T) call
e_t_err = 0.d0 e_t_err = 0.d0
print*,'Computing (T) correction...' print*,'Computing (T) correction...'
call wall_time(ta) call wall_time(ta)
! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v &
! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t)
call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & call ccsd_par_t_space_stoch(nO,nV,t1%f,t2%f,cc_space_f_o,cc_space_f_v &
,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err) ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err)
call wall_time(tb) call wall_time(tb)
@ -235,168 +315,161 @@ subroutine run_ccsd_space_orb
call save_energy(uncorr_energy + energy, e_t) call save_energy(uncorr_energy + energy, e_t)
deallocate(t1,t2) deallocate(h_t1, h_t2)
if (do_mo_cholesky) then
call gpu_deallocate(d_cc_space_v_oo_chol)
call gpu_deallocate(d_cc_space_v_ov_chol)
call gpu_deallocate(d_cc_space_v_vo_chol)
call gpu_deallocate(d_cc_space_v_vv_chol)
endif
call gpu_deallocate(d_cc_space_v_oovv)
call gpu_deallocate(d_cc_space_v_voov)
call gpu_deallocate(d_cc_space_v_ovov)
call gpu_deallocate(d_cc_space_v_oovo)
call gpu_deallocate(d_cc_space_v_ovvo)
call gpu_deallocate(d_cc_space_v_vooo)
call gpu_deallocate(d_cc_space_v_oooo)
call gpu_deallocate(d_cc_space_v_vvoo)
call gpu_deallocate(d_cc_space_v_ovoo)
call gpu_deallocate(d_cc_space_f_oo)
call gpu_deallocate(d_cc_space_f_vo)
call gpu_deallocate(d_cc_space_f_ov)
call gpu_deallocate(d_cc_space_f_vv)
call gpu_deallocate(t1)
call gpu_deallocate(t2)
end end
! Energy ! Energy
subroutine ccsd_energy_space(nO,nV,tau,t1,energy) subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy)
use gpu
implicit none implicit none
integer, intent(in) :: nO, nV integer, intent(in) :: nO, nV
double precision, intent(in) :: tau(nO,nO,nV,nV) type(gpu_double4), intent(in) :: tau_x, d_cc_space_v_oovv
double precision, intent(in) :: t1(nO,nV) type(gpu_double2), intent(in) :: t1, d_cc_space_f_vo
double precision, intent(out) :: energy double precision, intent(out) :: energy
! internal ! internal
integer :: i,j,a,b integer :: i,j,a,b
double precision :: e double precision :: e
energy = 0d0 type(gpu_stream) :: s1, s2
!$omp parallel & call gpu_stream_create(s1)
!$omp shared(nO,nV,energy,tau,t1,& call gpu_stream_create(s2)
!$omp cc_space_f_vo,cc_space_w_oovv) &
!$omp private(i,j,a,b,e) &
!$omp default(none)
e = 0d0
!$omp do
do a = 1, nV
do i = 1, nO
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
enddo
enddo
!$omp end do nowait
!$omp do
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
enddo
enddo
enddo
enddo
!$omp end do nowait
!$omp critical
energy = energy + e
!$omp end critical
!$omp end parallel
end call gpu_set_stream(blas_handle,s1)
call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo%f(1,1), 1, t1%f(1,1), 1, e)
subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) call gpu_set_stream(blas_handle,s2)
call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x%f(1,1,1,1), 1_8, d_cc_space_v_oovv%f(1,1,1,1), 1_8, energy)
call gpu_set_stream(blas_handle,gpu_default_stream)
implicit none call gpu_synchronize()
call gpu_stream_destroy(s1)
call gpu_stream_destroy(s2)
integer, intent(in) :: nO, nV energy = energy + 2.d0*e
double precision, intent(in) :: tau_x(nO,nO,nV,nV)
double precision, intent(in) :: t1(nO,nV)
double precision, intent(out) :: energy
! internal
integer :: i,j,a,b
double precision :: e
energy = 0d0
!$omp parallel &
!$omp shared(nO,nV,energy,tau_x,t1,&
!$omp cc_space_f_vo,cc_space_v_oovv) &
!$omp private(i,j,a,b,e) &
!$omp default(none)
e = 0d0
!$omp do
do a = 1, nV
do i = 1, nO
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
enddo
enddo
!$omp end do nowait
!$omp do
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b)
enddo
enddo
enddo
enddo
!$omp end do nowait
!$omp critical
energy = energy + e
!$omp end critical
!$omp end parallel
end end
! Tau ! Tau
subroutine update_tau_space(nO,nV,t1,t2,tau) subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau)
use gpu
implicit none implicit none
! in ! in
integer, intent(in) :: nO, nV integer, intent(in) :: nO, nV
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) double precision, intent(in) :: h_t1(nO,nV)
type(gpu_double2), intent(in) :: t1
type(gpu_double4), intent(in) :: t2
! out ! out
double precision, intent(out) :: tau(nO,nO,nV,nV) type(gpu_double4) :: tau
! internal ! internal
integer :: i,j,a,b integer :: i,j,a,b
type(gpu_stream) :: stream(nV)
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) &
!$OMP PRIVATE(i,j,a,b) & !$OMP PRIVATE(i,j,a,b) &
!$OMP DEFAULT(NONE) !$OMP DEFAULT(NONE)
!$OMP DO !$OMP DO
do b = 1, nV do b=1,nV
do a = 1, nV call gpu_stream_create(stream(b))
do j = 1, nO call gpu_set_stream(blas_handle,stream(b))
do i = 1, nO do j=1,nO
tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, &
enddo 1.d0, t2%f(1,j,1,b), nO*nO, &
enddo h_t1(j,b), t1%f(1,1), nO, &
tau%f(1,j,1,b), nO*nO)
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call gpu_synchronize()
do b=1,nV
call gpu_stream_destroy(stream(b))
enddo
call gpu_set_stream(blas_handle,gpu_default_stream)
end end
subroutine update_tau_x_space(nO,nV,tau,tau_x) subroutine update_tau_x_space(nO,nV,tau,tau_x)
use gpu
implicit none implicit none
! in ! in
integer, intent(in) :: nO, nV integer, intent(in) :: nO, nV
double precision, intent(in) :: tau(nO,nO,nV,nV) type(gpu_double4), intent(in) :: tau
! out ! out
double precision, intent(out) :: tau_x(nO,nO,nV,nV) type(gpu_double4) :: tau_x
! internal ! internal
integer :: i,j,a,b integer :: i,j,a,b
type(gpu_stream) :: stream(nV)
do a=1,nV
call gpu_stream_create(stream(a))
enddo
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP SHARED(nO,nV,tau,tau_x) & !$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) &
!$OMP PRIVATE(i,j,a,b) & !$OMP PRIVATE(a,b) &
!$OMP DEFAULT(NONE) !$OMP DEFAULT(NONE)
!$OMP DO !$OMP DO
do b = 1, nV do b=1,nV
do a = 1, nV do a=1,nV
do j = 1, nO call gpu_set_stream(blas_handle,stream(a))
do i = 1, nO call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, &
tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) 2.d0, tau%f(1,1,a,b), nO, &
enddo -1.d0, tau%f(1,1,b,a), nO, &
enddo tau_x%f(1,1,a,b), nO)
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call gpu_set_stream(blas_handle,gpu_default_stream)
call gpu_synchronize()
do b=1,nV
call gpu_stream_destroy(stream(b))
enddo
end end
! R1 ! R1

File diff suppressed because it is too large Load Diff

1
src/gpu/NEED Normal file
View File

@ -0,0 +1 @@
gpu_arch

6
src/gpu/README.rst Normal file
View File

@ -0,0 +1,6 @@
===
gpu
===
Bindings for GPU routines (architecture independent).
Architecture-dependent files are in gpu_arch.

View File

@ -22,20 +22,20 @@ void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_
void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result); void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result);
void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha, void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy); const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy);
void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha, void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy); const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy);
void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha, void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc); const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc);
void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha, void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc); const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc);
void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha, void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc); const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc);
void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha, void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc); const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc);

26
src/gpu/gpu.irp.f Normal file
View File

@ -0,0 +1,26 @@
use gpu
BEGIN_PROVIDER [ type(gpu_blas), blas_handle ]
implicit none
BEGIN_DOC
! Handle for cuBLAS or RocBLAS
END_DOC
call gpu_blas_create(blas_handle)
END_PROVIDER
BEGIN_PROVIDER [ type(gpu_stream), gpu_default_stream ]
implicit none
BEGIN_DOC
! Default stream
END_DOC
gpu_default_stream%c = C_NULL_PTR
END_PROVIDER
BEGIN_PROVIDER [ integer, gpu_num ]
implicit none
BEGIN_DOC
! Number of usable GPUs
END_DOC
gpu_num = gpu_ndevices()
END_PROVIDER

707
src/gpu/gpu_module.F90 Normal file
View File

@ -0,0 +1,707 @@
module gpu
use, intrinsic :: iso_c_binding
implicit none
! Data types
! ----------
type gpu_double1
type(c_ptr) :: c
double precision, pointer :: f(:)
end type
type gpu_double2
type(c_ptr) :: c
double precision, pointer :: f(:,:)
end type
type gpu_double3
type(c_ptr) :: c
double precision, pointer :: f(:,:,:)
end type
type gpu_double4
type(c_ptr) :: c
double precision, pointer :: f(:,:,:,:)
end type
type gpu_double5
type(c_ptr) :: c
double precision, pointer :: f(:,:,:,:,:)
end type
type gpu_double6
type(c_ptr) :: c
double precision, pointer :: f(:,:,:,:,:,:)
end type
type gpu_blas
type(c_ptr) :: c
end type
type gpu_stream
type(c_ptr) :: c
end type
! C interfaces
! ------------
interface
logical(c_bool) function no_gpu() bind(C)
import
end function
integer function gpu_ndevices() bind(C)
import
end function
subroutine gpu_set_device(id) bind(C)
import
integer(c_int32_t), value :: id
end subroutine
subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate')
import
type(c_ptr) :: ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate')
import
type(c_ptr) :: ptr
end subroutine
subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload')
import
type(c_ptr), value :: cpu_ptr
type(c_ptr), value :: gpu_ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download')
import
type(c_ptr), value :: gpu_ptr
type(c_ptr), value :: cpu_ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy')
import
type(c_ptr), value :: gpu_ptr_src
type(c_ptr), value :: gpu_ptr_dest
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_stream_create_c(stream) bind(C, name='gpu_stream_create')
import
type(c_ptr) :: stream
end subroutine
subroutine gpu_stream_destroy_c(stream) bind(C, name='gpu_stream_destroy')
import
type(c_ptr) :: stream
end subroutine
subroutine gpu_set_stream_c(handle, stream) bind(C, name='gpu_set_stream')
import
type(c_ptr), value :: handle, stream
end subroutine
subroutine gpu_synchronize() bind(C)
import
end subroutine
subroutine gpu_blas_create_c(handle) bind(C, name='gpu_blas_create')
import
type(c_ptr) :: handle
end subroutine
subroutine gpu_blas_destroy_c(handle) bind(C, name='gpu_blas_destroy')
import
type(c_ptr) :: handle
end subroutine
subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot')
import
type(c_ptr), value, intent(in) :: handle
integer(c_int64_t), value :: n, incx, incy
type(c_ptr), value :: dx, dy
real(c_double), intent(out) :: res
end subroutine
subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot')
import
type(c_ptr), value, intent(in) :: handle
integer(c_int64_t), value :: n, incx, incy
type(c_ptr), intent(in), value :: dx, dy
real(c_float), intent(out) :: res
end subroutine
subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc) bind(C, name='gpu_dgeam')
import
type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in), value :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
real(c_double), intent(in) :: alpha, beta
type(c_ptr), value :: a, b, c
end subroutine
subroutine gpu_sgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc) bind(C, name='gpu_sgeam')
import
type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in), value :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
real(c_float), intent(in) :: alpha, beta
real(c_float) :: a, b, c
end subroutine
subroutine gpu_dgemv_c(handle, transa, m, n, alpha, a, lda, &
x, incx, beta, y, incy) bind(C, name='gpu_dgemv')
import
type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in) :: transa
integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy
real(c_double), intent(in) :: alpha, beta
real(c_double) :: a, x, y
end subroutine
subroutine gpu_sgemv_c(handle, transa, m, n, alpha, a, lda, &
x, incx, beta, y, incy) bind(C, name='gpu_sgemv')
import
type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in) :: transa
integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy
real(c_float), intent(in) :: alpha, beta
real(c_float) :: a, x, y
end subroutine
subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm')
import
type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in) :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc
real(c_double), intent(in) :: alpha, beta
real(c_double) :: a, b, c
end subroutine
subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc) bind(C, name='gpu_sgemm')
import
type(c_ptr), value, intent(in) :: handle
character(c_char), intent(in), value :: transa, transb
integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc
real(c_float), intent(in) :: alpha, beta
real(c_float) :: a, b, c
end subroutine
end interface
! Polymorphic interfaces
! ----------------------
interface gpu_allocate
procedure gpu_allocate_double1 &
,gpu_allocate_double2 &
,gpu_allocate_double3 &
,gpu_allocate_double4 &
,gpu_allocate_double5 &
,gpu_allocate_double6 &
,gpu_allocate_double1_64 &
,gpu_allocate_double2_64 &
,gpu_allocate_double3_64 &
,gpu_allocate_double4_64 &
,gpu_allocate_double5_64 &
,gpu_allocate_double6_64
end interface gpu_allocate
interface gpu_deallocate
procedure gpu_deallocate_double1 &
,gpu_deallocate_double2 &
,gpu_deallocate_double3 &
,gpu_deallocate_double4 &
,gpu_deallocate_double5 &
,gpu_deallocate_double6
end interface gpu_deallocate
interface gpu_upload
procedure gpu_upload_double1 &
,gpu_upload_double2 &
,gpu_upload_double3 &
,gpu_upload_double4 &
,gpu_upload_double5 &
,gpu_upload_double6
end interface gpu_upload
interface gpu_download
procedure gpu_download_double1 &
,gpu_download_double2 &
,gpu_download_double3 &
,gpu_download_double4 &
,gpu_download_double5 &
,gpu_download_double6
end interface gpu_download
interface gpu_copy
procedure gpu_copy_double1 &
,gpu_copy_double2 &
,gpu_copy_double3 &
,gpu_copy_double4 &
,gpu_copy_double5 &
,gpu_copy_double6
end interface gpu_copy
contains
! gpu_allocate
! ------------
subroutine gpu_allocate_double1(ptr, s)
implicit none
type(gpu_double1), intent(inout) :: ptr
integer, intent(in) :: s
call gpu_allocate_c(ptr%c, s*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s /))
end subroutine
subroutine gpu_allocate_double2(ptr, s1, s2)
implicit none
type(gpu_double2), intent(inout) :: ptr
integer, intent(in) :: s1, s2
call gpu_allocate_c(ptr%c, s1*s2*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
end subroutine
subroutine gpu_allocate_double3(ptr, s1, s2, s3)
implicit none
type(gpu_double3), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3
call gpu_allocate_c(ptr%c, s1*s2*s3*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
end subroutine
subroutine gpu_allocate_double4(ptr, s1, s2, s3, s4)
implicit none
type(gpu_double4), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3, s4
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
end subroutine
subroutine gpu_allocate_double5(ptr, s1, s2, s3, s4, s5)
implicit none
type(gpu_double5), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3, s4, s5
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
end subroutine
subroutine gpu_allocate_double6(ptr, s1, s2, s3, s4, s5, s6)
implicit none
type(gpu_double6), intent(inout) :: ptr
integer, intent(in) :: s1, s2, s3, s4, s5, s6
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
end subroutine
subroutine gpu_allocate_double1_64(ptr, s)
implicit none
type(gpu_double1), intent(inout) :: ptr
integer*8, intent(in) :: s
call gpu_allocate_c(ptr%c, s)
call c_f_pointer(ptr%c, ptr%f, (/ s /))
end subroutine
subroutine gpu_allocate_double2_64(ptr, s1, s2)
implicit none
type(gpu_double2), intent(inout) :: ptr
integer*8, intent(in) :: s1, s2
call gpu_allocate_c(ptr%c, s1*s2*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /))
end subroutine
subroutine gpu_allocate_double3_64(ptr, s1, s2, s3)
implicit none
type(gpu_double3), intent(inout) :: ptr
integer*8, intent(in) :: s1, s2, s3
call gpu_allocate_c(ptr%c, s1*s2*s3*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /))
end subroutine
subroutine gpu_allocate_double4_64(ptr, s1, s2, s3, s4)
implicit none
type(gpu_double4), intent(inout) :: ptr
integer*8, intent(in) :: s1, s2, s3, s4
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /))
end subroutine
subroutine gpu_allocate_double5_64(ptr, s1, s2, s3, s4, s5)
implicit none
type(gpu_double5), intent(inout) :: ptr
integer*8, intent(in) :: s1, s2, s3, s4, s5
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /))
end subroutine
subroutine gpu_allocate_double6_64(ptr, s1, s2, s3, s4, s5, s6)
implicit none
type(gpu_double6), intent(inout) :: ptr
integer*8, intent(in) :: s1, s2, s3, s4, s5, s6
call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8)
call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /))
end subroutine
! gpu_deallocate
! --------------
subroutine gpu_deallocate_double1(ptr)
implicit none
type(gpu_double1), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double2(ptr)
implicit none
type(gpu_double2), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double3(ptr)
implicit none
type(gpu_double3), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double4(ptr)
implicit none
type(gpu_double4), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double5(ptr)
implicit none
type(gpu_double5), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
subroutine gpu_deallocate_double6(ptr)
implicit none
type(gpu_double6), intent(inout) :: ptr
call gpu_deallocate_c(ptr%c)
NULLIFY(ptr%f)
end subroutine
! gpu_upload
! ----------
subroutine gpu_upload_double1(cpu_ptr, gpu_ptr)
implicit none
double precision, target, intent(in) :: cpu_ptr(*)
type(gpu_double1), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*size(gpu_ptr%f))
end subroutine
subroutine gpu_upload_double2(cpu_ptr, gpu_ptr)
implicit none
double precision, target, intent(in) :: cpu_ptr(:,:)
type(gpu_double2), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double3(cpu_ptr, gpu_ptr)
implicit none
double precision, target, intent(in) :: cpu_ptr(:,:,:)
type(gpu_double3), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double4(cpu_ptr, gpu_ptr)
implicit none
double precision, target, intent(in) :: cpu_ptr(:,:,:,:)
type(gpu_double4), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double5(cpu_ptr, gpu_ptr)
implicit none
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:)
type(gpu_double5), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
subroutine gpu_upload_double6(cpu_ptr, gpu_ptr)
implicit none
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:)
type(gpu_double6), intent(in) :: gpu_ptr
call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8)
end subroutine
! gpu_download
! ------------
subroutine gpu_download_double1(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double1), intent(in) :: gpu_ptr
double precision, target, intent(in) :: cpu_ptr(:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*size(gpu_ptr%f))
end subroutine
subroutine gpu_download_double2(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double2), intent(in) :: gpu_ptr
double precision, target, intent(in) :: cpu_ptr(:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double3(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double3), intent(in) :: gpu_ptr
double precision, target, intent(in) :: cpu_ptr(:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double4(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double4), intent(in) :: gpu_ptr
double precision, target, intent(in) :: cpu_ptr(:,:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double5(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double5), intent(in) :: gpu_ptr
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
subroutine gpu_download_double6(gpu_ptr, cpu_ptr)
implicit none
type(gpu_double6), intent(in) :: gpu_ptr
double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:)
call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8))
end subroutine
! gpu_copy
! --------
subroutine gpu_copy_double1(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double1), intent(in) :: gpu_ptr_src
type(gpu_double1), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*size(gpu_ptr_dest%f))
end subroutine
subroutine gpu_copy_double2(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double2), intent(in) :: gpu_ptr_src
type(gpu_double2), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double3(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double3), intent(in) :: gpu_ptr_src
type(gpu_double3), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double4(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double4), intent(in) :: gpu_ptr_src
type(gpu_double4), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double5(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double5), intent(in) :: gpu_ptr_src
type(gpu_double5), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
subroutine gpu_copy_double6(gpu_ptr_src, gpu_ptr_dest)
implicit none
type(gpu_double6), intent(in) :: gpu_ptr_src
type(gpu_double6), intent(in) :: gpu_ptr_dest
call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8))
end subroutine
! gpu_stream
! ----------
subroutine gpu_stream_create(stream)
type(gpu_stream) :: stream
call gpu_stream_create_c(stream%c)
end subroutine
subroutine gpu_stream_destroy(stream)
type(gpu_stream) :: stream
call gpu_stream_destroy_c(stream%c)
end subroutine
subroutine gpu_set_stream(handle, stream)
type(gpu_blas) :: handle
type(gpu_stream) :: stream
call gpu_set_stream_c(handle%c, stream%c)
end subroutine
! gpu_blas
! --------
subroutine gpu_blas_create(handle)
type(gpu_blas) :: handle
call gpu_blas_create_c(handle%c)
end subroutine
subroutine gpu_blas_destroy(handle)
type(gpu_blas) :: handle
call gpu_blas_destroy_c(handle%c)
end subroutine
! dot
! ---
subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res)
! use gpu
type(gpu_blas), intent(in) :: handle
integer*4 :: n, incx, incy
double precision, target :: dx, dy
double precision, intent(out) :: res
call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res)
end subroutine
subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res)
! use gpu
type(gpu_blas), intent(in) :: handle
integer*8 :: n, incx, incy
double precision, target :: dx, dy
double precision, intent(out) :: res
call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res)
end subroutine
! geam
! ----
subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc)
! use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*4, intent(in) :: m, n, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision, target :: a, b, c
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, &
c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t))
end subroutine
subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, &
b, ldb, c, ldc)
! use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*8, intent(in) :: m, n, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision, target :: a, b, c
call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, &
c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t))
end subroutine
! gemv
! ----
subroutine gpu_dgemv(handle, transa, m, n, alpha, a, lda, &
x, incx, beta, y, incy)
! use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa
integer*4, intent(in) :: m, n, lda, incx, incy
double precision, intent(in) :: alpha, beta
double precision :: a, x, y
call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
alpha, a, int(lda,c_int64_t), &
x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
end subroutine
subroutine gpu_dgemv_64(handle, transa, m, n, alpha, a, lda, &
x, incx, beta, y, incy)
! use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa
integer*8, intent(in) :: m, n, lda, incx, incy
double precision, intent(in) :: alpha, beta
double precision :: a, x, y
call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), &
alpha, a, int(lda,c_int64_t), &
x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t))
end subroutine
! gemm
! ----
subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc)
! use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*4, intent(in) :: m, n, k, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision :: a, b, c
call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
alpha, a, int(lda,c_int64_t), &
b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
end subroutine
subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, &
b, ldb, beta, c, ldc)
! use gpu
type(gpu_blas), intent(in) :: handle
character, intent(in) :: transa, transb
integer*8, intent(in) :: m, n, k, lda, ldb, ldc
double precision, intent(in) :: alpha, beta
double precision :: a, b, c
call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), &
alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t))
end subroutine
end module

View File

@ -1,141 +0,0 @@
module gpu
use, intrinsic :: iso_c_binding, only : c_int32_t, c_int64_t, c_double, c_size_t, c_char
implicit none
interface
integer function gpu_ndevices() bind(C)
end function
subroutine gpu_set_device(id) bind(C)
import
integer(c_int32_t), value :: id
end subroutine
subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate')
import
type(c_ptr) :: ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_free_c(ptr) bind(C, name='gpu_free')
import
type(c_ptr) :: ptr
end subroutine
subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload')
import
type(c_ptr), value :: cpu_ptr
type(c_ptr), value :: gpu_ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download')
import
type(c_ptr), value :: gpu_ptr
type(c_ptr), value :: cpu_ptr
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy')
import
type(c_ptr), value :: gpu_ptr_src
type(c_ptr), value :: gpu_ptr_dest
integer(c_int64_t), value :: n
end subroutine
subroutine gpu_stream_create(stream) bind(C)
import
type(c_ptr) :: stream
end subroutine
subroutine gpu_stream_destroy(stream) bind(C)
import
type(c_ptr) :: stream
end subroutine
subroutine gpu_set_stream(handle, stream) bind(C)
import
type(c_ptr) :: handle, stream
end subroutine
subroutine gpu_synchronize()
end subroutine
subroutine gpu_blas_create(handle) bind(C)
import
type(c_ptr) :: handle
end subroutine
subroutine gpu_blas_destroy(handle) bind(C)
import
type(c_ptr) :: handle
end subroutine
subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) bind(C)
import
type(c_ptr), intent(in) :: handle
integer(c_int64_t), value :: n, incx, incy
real(c_double), intent(in) :: dx(*), dy(*)
real(c_double), intent(out) :: res
end subroutine
subroutine gpu_sdot(handle, n, dx, incx, dy, incy, res) bind(C)
import
type(c_ptr), intent(in) :: handle
integer(c_int64_t), value :: n, incx, incy
real(c_float), intent(in) :: dx(*), dy(*)
real(c_float), intent(out) :: res
end subroutine
end interface
end module
subroutine gpu_allocate_double(ptr, s)
use gpu
implicit none
double precision, pointer, intent(inout) :: ptr
integer*8, intent(in) :: s(*)
type(c_ptr) :: cptr
call gpu_allocate_c(cptr, sum(s)*8_8)
call c_f_pointer(cptr, ptr, s)
end subroutine
subroutine gpu_free_double(ptr)
use gpu
implicit none
double precision, pointer, intent(inout) :: ptr
type(c_ptr) :: cptr
cptr = cloc(ptr)
call gpu_free(cptr)
NULLIFY(ptr)
end subroutine
subroutine gpu_upload_double(cpu_ptr, gpu_ptr, n)
use gpu
implicit none
double precision, intent(in) :: cpu_ptr(*)
double precision, intent(out) :: gpu_ptr(*)
integer(c_int64_t), intent(in) :: n
call gpu_upload_c(cpu_ptr, gpu_ptr, 8_8*n)
end subroutine
subroutine gpu_download_double(gpu_ptr, cpu_ptr, n)
use gpu
implicit none
double precision, intent(in) :: gpu_ptr(*)
double precision, intent(out) :: cpu_ptr(*)
integer(c_int64_t), intent(in) :: n
call gpu_download_c(gpu_ptr, cpu_ptr, 8_8*n)
end subroutine
subroutine gpu_copy_double(gpu_ptr_src, gpu_ptr_dest, n)
use gpu
implicit none
double precision, intent(in) :: gpu_ptr_src(*)
double precision, intent(out) :: gpu_ptr_dest(*)
integer(c_int64_t), intent(in) :: n
call gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, 8_8*n)
end subroutine

View File

@ -1,7 +1,3 @@
two_body_rdm mo_optimization_utils
hartree_fock
cipsi
davidson_undressed
selectors_full selectors_full
generators_full generators_full
utils_trust_region

View File

@ -2,87 +2,7 @@ program optimization
read_wf = .true. ! must be True for the orbital optimization !!! read_wf = .true. ! must be True for the orbital optimization !!!
TOUCH read_wf TOUCH read_wf
call run_optimization call run_optimization_mos_CIPSI
end end
subroutine run_optimization
implicit none
double precision :: e_cipsi, e_opt, delta_e
double precision, allocatable :: Ev(:),PT2(:)
integer :: nb_iter,i
logical :: not_converged
character (len=100) :: filename
PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals
allocate(Ev(N_states),PT2(N_states))
not_converged = .True.
nb_iter = 0
! To start from the wf
N_det_max = max(n_det,5)
TOUCH N_det_max
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt')
write(10,*) " Ndet E_cipsi E_opt Delta_e"
call state_average_energy(e_cipsi)
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0
close(10)
do while (not_converged)
print*,''
print*,'======================'
print*,' Cipsi step:', nb_iter
print*,'======================'
print*,''
print*,'********** cipsi step **********'
! cispi calculation
call run_stochastic_cipsi(Ev,PT2)
! State average energy after the cipsi step
call state_average_energy(e_cipsi)
print*,''
print*,'********** optimization step **********'
! orbital optimization
call run_orb_opt_trust_v2
! State average energy after the orbital optimization
call state_average_energy(e_opt)
print*,''
print*,'********** diff step **********'
! Gain in energy
delta_e = e_opt - e_cipsi
print*, 'Gain in energy during the orbital optimization:', delta_e
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append')
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e
close(10)
! Exit
if (delta_e > 1d-12) then
print*, 'WARNING, something wrong happened'
print*, 'The gain (delta_e) in energy during the optimization process'
print*, 'is > 0, but it must be < 0'
print*, 'The program will exit'
exit
endif
if (n_det > n_det_max_opt) then
print*, 'The number of determinants in the wf > n_det_max_opt'
print*, 'The program will exit'
exit
endif
! To double the number of determinants in the wf
N_det_max = int(dble(n_det * 2)*0.9)
TOUCH N_det_max
nb_iter = nb_iter + 1
enddo
end

View File

@ -0,0 +1,5 @@
two_body_rdm
hartree_fock
cipsi
davidson_undressed
utils_trust_region

View File

@ -0,0 +1,74 @@
# Orbital optimization
## Methods
Different methods are available:
- full hessian
```
qp set orbital_optimization optimization_method full
```
- diagonal hessian
```
qp set orbital_optimization optimization_method diag
```
- identity matrix
```
qp set orbital_optimization optimization_method none
```
After the optimization the ezfio contains the optimized orbitals
## For a fixed number of determinants
To optimize the MOs for the actual determinants:
```
qp run orb_opt
```
## For a complete optimization, i.e, with a larger and larger wave function
To optimize the MOs with a larger and larger wave function:
```
qp run optimization
```
The results are stored in the EZFIO in "mo_optimization/result_opt",
with the following format:
(1) (2) (3) (4)
1: Number of determinants in the wf,
2: Cispi energy before the optimization,
3: Cipsi energy after the optimization,
4: Energy difference between (2) and (3).
The optimization process if the following:
- we do a first cipsi step to obtain a small number of determinants in the wf
- we run an orbital optimization for this wf
- we do a new cipsi step to double the number of determinants in the wf
- we run an orbital optimization for this wf
- ...
- we do that until the energy difference between (2) and (3) is
smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit)
or the wf is larger than a given size (n_det_max_opt in qp_edit)
- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation
### End of the optimization
You can choos the number of determinants after what the
optimization will stop:
```
qp set orbital_optimization n_det_max_opt 1e5 # or any number
```
## Weight of the states
You can change the weights of the differents states directly in qp edit.
It will affect ths weights used in the orbital optimization.
# Tests
To run the tests:
```
qp test
```
# Org files
The org files are stored in the directory org in order to avoid overwriting on user changes.
The org files can be modified, to export the change to the source code, run
```
./TANGLE_org_mode.sh
mv *.irp.f ../.
```

View File

@ -0,0 +1,81 @@
subroutine run_optimization_mos_CIPSI
implicit none
double precision :: e_cipsi, e_opt, delta_e
double precision, allocatable :: Ev(:),PT2(:)
integer :: nb_iter,i
logical :: not_converged
character (len=100) :: filename
PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals
allocate(Ev(N_states),PT2(N_states))
not_converged = .True.
nb_iter = 0
! To start from the wf
N_det_max = max(n_det,5)
TOUCH N_det_max
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt')
write(10,*) " Ndet E_cipsi E_opt Delta_e"
call state_average_energy(e_cipsi)
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0
close(10)
do while (not_converged)
print*,''
print*,'======================'
print*,' Cipsi step:', nb_iter
print*,'======================'
print*,''
print*,'********** cipsi step **********'
! cispi calculation
call run_stochastic_cipsi(Ev,PT2)
! State average energy after the cipsi step
call state_average_energy(e_cipsi)
print*,''
print*,'********** optimization step **********'
! orbital optimization
call run_orb_opt_trust_v2
! State average energy after the orbital optimization
call state_average_energy(e_opt)
print*,''
print*,'********** diff step **********'
! Gain in energy
delta_e = e_opt - e_cipsi
print*, 'Gain in energy during the orbital optimization:', delta_e
open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append')
write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e
close(10)
! Exit
if (delta_e > 1d-12) then
print*, 'WARNING, something wrong happened'
print*, 'The gain (delta_e) in energy during the optimization process'
print*, 'is > 0, but it must be < 0'
print*, 'The program will exit'
exit
endif
if (n_det > n_det_max_opt) then
print*, 'The number of determinants in the wf > n_det_max_opt'
print*, 'The program will exit'
exit
endif
! To double the number of determinants in the wf
N_det_max = int(dble(n_det * 2)*0.9)
TOUCH N_det_max
nb_iter = nb_iter + 1
enddo
end

View File

@ -101,3 +101,34 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num,
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_num, ao_num, mo_num) ]
implicit none
BEGIN_DOC
! Cholesky vectors in MO basis
END_DOC
double precision, allocatable :: X(:,:,:)
double precision :: wall0, wall1
integer :: ierr
print *, 'Semi AO->MO Transformation of Cholesky vectors'
call wall_time(wall0)
allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr)
if (ierr /= 0) then
print *, irp_here, ': Allocation failed'
endif
integer :: i_chol, i_mo, j_mo, i_ao
cholesky_semi_mo_transp_simple = 0.d0
do i_mo = 1, mo_num
do i_ao = 1, ao_num
do j_mo = 1, mo_num
do i_chol = 1, cholesky_mo_num
cholesky_semi_mo_transp_simple(i_chol, i_ao,i_mo) += cholesky_mo_transp(i_chol,j_mo,i_mo) * mo_coef_transp(j_mo,i_ao)
enddo
enddo
enddo
enddo
END_PROVIDER

View File

@ -40,7 +40,7 @@ end
! Min and max values of the MOs for which the integrals are in the cache ! Min and max values of the MOs for which the integrals are in the cache
END_DOC END_DOC
mo_integrals_cache_size = 2_8**mo_integrals_cache_shift mo_integrals_cache_size = 2**mo_integrals_cache_shift
mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) )
mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1)

View File

@ -289,6 +289,106 @@ BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)]
endif endif
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse_bis, (n_points_final_grid)]
implicit none
integer :: ipoint,m,mm,i,ii,p
!!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ
!! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ
!! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R)
!! = \sum_A V_AR G_AR
!! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI
double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1
double precision :: thresh_1,thresh_2
double precision, allocatable :: accu_vec(:),delta_vec(:)
thresh_2 = ao_cholesky_threshold * 100.d0
thresh_1 = dsqrt(thresh_2)
provide cholesky_mo_transp
if(elec_alpha_num == elec_beta_num)then
call wall_time(wall0)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) &
!$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,mos_in_r_array_omp,aos_in_r_array,thresh_1,thresh_2) &
!$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_semi_mo_transp_simple,ao_num)
allocate(accu_vec(cholesky_mo_num))
!$OMP DO
do ipoint = 1, n_points_final_grid
f_hf_cholesky_sparse_bis(ipoint) = 0.d0
accu_vec = 0.d0
do ii = 1, n_occ_val_orb_for_hf(1)
i = list_valence_orb_for_hf(ii,1)
mo_i_r1 = mos_in_r_array_omp(i,ipoint)
if(dabs(mo_i_r1).lt.thresh_1)cycle
do mm = 1, ao_num ! electron 1
mo_b_r1 = aos_in_r_array(mm,ipoint)*mo_i_r1
if(dabs(mo_b_r1).lt.thresh_2)cycle
do p = 1, cholesky_mo_num
accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_semi_mo_transp_simple(p,mm,i)
enddo
enddo
enddo
do p = 1, cholesky_mo_num
f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p)
enddo
f_hf_cholesky_sparse_bis(ipoint) *= 2.D0
enddo
!$OMP END DO
deallocate(accu_vec)
!$OMP END PARALLEL
call wall_time(wall1)
print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0
else
call wall_time(wall0)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) &
!$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) &
!$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_mo_transp,n_basis_orb)
allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num))
!$OMP DO
do ipoint = 1, n_points_final_grid
f_hf_cholesky_sparse_bis(ipoint) = 0.d0
accu_vec = 0.d0
do ii = 1, n_occ_val_orb_for_hf(2)
i = list_valence_orb_for_hf(ii,2)
mo_i_r1 = mos_in_r_array_omp(i,ipoint)
if(dabs(mo_i_r1).lt.thresh_1)cycle
do mm = 1, n_basis_orb ! electron 1
m = list_basis(mm)
mo_b_r1 = mos_in_r_array_omp(m,ipoint)
if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle
do p = 1, cholesky_mo_num
accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i)
enddo
enddo
enddo
delta_vec = 0.d0
do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1)
i = list_valence_orb_for_hf(ii,1)
mo_i_r1 = mos_in_r_array_omp(i,ipoint)
if(dabs(mo_i_r1).lt.thresh_1)cycle
do mm = 1, n_basis_orb ! electron 1
m = list_basis(mm)
mo_b_r1 = mos_in_r_array_omp(m,ipoint)
if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle
do p = 1, cholesky_mo_num
delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i)
enddo
enddo
enddo
do p = 1, cholesky_mo_num
f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p)
enddo
f_hf_cholesky_sparse_bis(ipoint) *= 2.D0
enddo
!$OMP END DO
deallocate(accu_vec)
!$OMP END PARALLEL
call wall_time(wall1)
print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0
endif
END_PROVIDER
BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)]
implicit none implicit none
integer :: ipoint,i,ii integer :: ipoint,i,ii

View File

@ -0,0 +1,171 @@
BEGIN_PROVIDER [ double precision, two_e_int_mf, (elec_beta_num,elec_alpha_num,elec_beta_num,elec_alpha_num)]
implicit none
integer :: i,j,k,l
double precision :: get_two_e_integral
do i = 1, elec_alpha_num
do j = 1, elec_beta_num
do k = 1, elec_alpha_num
do l = 1, elec_beta_num
two_e_int_mf(l,k,j,i) = get_two_e_integral(l,k,j,i,mo_integrals_map)
enddo
enddo
enddo
enddo
END_PROVIDER
subroutine get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b)
implicit none
double precision, intent(in) :: r(3)
double precision, intent(out):: f_mf_ab,two_bod_dens, dm_a, dm_b
double precision, allocatable :: mos_array_r(:),mos_array_a(:), mos_array_b(:)
integer :: i,j,k,l
allocate(mos_array_r(mo_num), mos_array_a(elec_alpha_num), mos_array_b(elec_alpha_num))
call give_all_mos_at_r(r,mos_array_r)
do i = 1, elec_alpha_num
mos_array_a(i) = mos_array_r(i)
enddo
do i = 1, elec_beta_num
mos_array_b(i) = mos_array_r(i)
enddo
dm_a = 0.d0
do i = 1, elec_alpha_num
dm_a += mos_array_a(i) * mos_array_a(i)
enddo
dm_b = 0.d0
do i = 1, elec_beta_num
dm_b += mos_array_b(i) * mos_array_b(i)
enddo
two_bod_dens = dm_a * dm_b
f_mf_ab = 0.d0
do i = 1, elec_alpha_num
do j = 1, elec_beta_num
do k = 1, elec_alpha_num
do l = 1, elec_beta_num
f_mf_ab += two_e_int_mf(l,k,j,i) * mos_array_a(i) * mos_array_a(k) * mos_array_b(j) * mos_array_b(l)
enddo
enddo
enddo
enddo
! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm
f_mf_ab *= 2.d0
two_bod_dens *= 2.d0
end
subroutine get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b)
implicit none
double precision, intent(in) :: r(3)
double precision, intent(out) :: f_mf_ab, two_bod_dens
double precision, intent(out) :: grad_two_bod_dens(3), grad_f_mf_ab(3)
double precision, intent(out) :: dm_a, dm_b, grad_dm_a(3), grad_dm_b(3)
double precision, allocatable :: mos_array_r(:), mos_grad_array_r(:,:)
double precision, allocatable :: mos_array_a(:), mos_array_b(:)
double precision, allocatable :: mos_grad_array_a(:,:), mos_grad_array_b(:,:)
double precision :: mo_i, mo_j, mo_k, mo_l
double precision :: grad_mo_i(3), grad_mo_j(3), grad_mo_k(3), grad_mo_l(3)
integer :: i,j,k,l
allocate(mos_array_r(mo_num),mos_grad_array_r(3,mo_num))
allocate(mos_array_a(elec_alpha_num), mos_array_b(elec_beta_num))
allocate(mos_grad_array_a(3,elec_alpha_num), mos_grad_array_b(3,elec_beta_num))
call give_all_mos_and_grad_at_r(r,mos_array_r,mos_grad_array_r)
do i = 1, elec_alpha_num
mos_array_a(i) = mos_array_r(i)
mos_grad_array_a(1:3,i) = mos_grad_array_r(1:3,i)
enddo
do i = 1, elec_beta_num
mos_array_b(i) = mos_array_r(i)
mos_grad_array_b(1:3,i) = mos_grad_array_r(1:3,i)
enddo
! ALPHA DENSITY AND GRADIENT
dm_a = 0.d0
grad_dm_a = 0.d0
do i = 1, elec_alpha_num
dm_a += mos_array_a(i) * mos_array_a(i)
grad_dm_a(1:3) += 2.d0 * mos_array_a(i) * mos_grad_array_a(1:3,i)
enddo
! BETA DENSITY AND GRADIENT
dm_b = 0.d0
grad_dm_b = 0.d0
do i = 1, elec_beta_num
dm_b += mos_array_b(i) * mos_array_b(i)
grad_dm_b(1:3) += 2.d0 * mos_array_b(i) * mos_grad_array_b(1:3,i)
enddo
! TWO-BODY DENSITY AND GRADIENT
two_bod_dens = dm_a * dm_b
grad_two_bod_dens(1:3) = dm_a * grad_dm_b(1:3) + dm_b * grad_dm_a(1:3)
! F_MF and GRADIENT
grad_f_mf_ab = 0.d0
f_mf_ab = 0.d0
do i = 1, elec_alpha_num
mo_i = mos_array_a(i)
grad_mo_i(1:3) = mos_grad_array_a(1:3,i)
do j = 1, elec_beta_num
mo_j = mos_array_b(j)
grad_mo_j(1:3) = mos_grad_array_b(1:3,j)
do k = 1, elec_alpha_num
mo_k = mos_array_a(k)
grad_mo_k(1:3) = mos_grad_array_a(1:3,k)
do l = 1, elec_beta_num
mo_l = mos_array_b(l)
grad_mo_l(1:3) = mos_grad_array_b(1:3,l)
f_mf_ab += two_e_int_mf(l,k,j,i) * mo_i * mo_j * mo_k * mo_l
grad_f_mf_ab(1:3) += two_e_int_mf(l,k,j,i) * &
(mo_i * mo_j * mo_k * grad_mo_l(1:3) + mo_i * mo_j * grad_mo_k(1:3) * mo_l &
+mo_i * grad_mo_j(1:3) * mo_k * mo_l + grad_mo_i(1:3) * mo_j * mo_k * mo_l)
enddo
enddo
enddo
enddo
f_mf_ab *= 2.d0
two_bod_dens *= 2.d0
grad_f_mf_ab *= 2.D0
grad_two_bod_dens *= 2.d0
end
subroutine mu_of_r_mean_field(r,mu_mf, dm)
implicit none
include 'constants.include.F'
double precision, intent(in) :: r(3)
double precision, intent(out):: mu_mf, dm
double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b
call get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b)
dm = dm_a + dm_b
if(dabs(two_bod_dens).lt.1.d-10)then
mu_mf = 1.d+10
else
mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens
endif
end
subroutine grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm)
implicit none
include 'constants.include.F'
double precision, intent(in) :: r(3)
double precision, intent(out):: grad_mu_mf(3), grad_dm(3)
double precision, intent(out):: mu_mf, dm
double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3),grad_dm_a(3), grad_dm_b(3)
double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b
call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b)
dm = dm_a + dm_b
grad_dm(1:3) = grad_dm_a(1:3) + grad_dm_b(1:3)
if(dabs(two_bod_dens).lt.1.d-10)then
mu_mf = 1.d+10
grad_mu_mf = 0.d0
else
mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens
grad_mu_mf(1:3) = 0.5d0 * sqpi * (grad_f_mf_ab(1:3) * two_bod_dens - f_mf_ab * grad_two_bod_dens(1:3))&
/(two_bod_dens*two_bod_dens)
endif
end

View File

@ -15,7 +15,162 @@ program projected_operators
! call test_f_HF_valence_ab ! call test_f_HF_valence_ab
! call routine_full_mos ! call routine_full_mos
! call test_f_ii_valence_ab ! call test_f_ii_valence_ab
call test_f_ia_valence_ab ! call test_f_ia_valence_ab
call test_f_ii_ia_aa_valence_ab ! call test_f_ii_ia_aa_valence_ab
! call test
! call test_f_mean_field
! call test_grad_f_mean_field
call test_grad_mu_mf
end end
subroutine test
implicit none
integer :: i_point
double precision :: ref, new, accu, weight
accu = 0.d0
do i_point = 1, n_points_final_grid
ref = f_hf_cholesky_sparse(i_point)
new = f_hf_cholesky_sparse_bis(i_point)
weight = final_weight_at_r_vector(i_point)
accu += dabs(ref - new) * weight
enddo
print*,'accu = ',accu
end
subroutine test_f_mean_field
implicit none
integer :: i_point
double precision :: weight,r(3)
double precision :: ref_f, new_f, accu_f
double precision :: ref_two_dens, new_two_dens, accu_two_dens, dm_a, dm_b
accu_f = 0.d0
accu_two_dens = 0.d0
do i_point = 1, n_points_final_grid
r(1:3) = final_grid_points(1:3,i_point)
weight = final_weight_at_r_vector(i_point)
call get_f_mf_ab(r,new_f,new_two_dens, dm_a, dm_b)
call f_HF_valence_ab(r,r,ref_f,ref_two_dens)
accu_f += weight * dabs(new_f- ref_f)
accu_two_dens += weight * dabs(new_two_dens - ref_two_dens)
enddo
print*,'accu_f = ',accu_f
print*,'accu_two_dens = ',accu_two_dens
end
subroutine test_grad_f_mean_field
implicit none
integer :: i_point,k
double precision :: weight,r(3)
double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3)
double precision :: grad_dm_a(3), grad_dm_b(3)
double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b
double precision :: num_grad_f_mf_ab(3), num_grad_two_bod_dens(3)
double precision :: num_grad_dm_a(3), num_grad_dm_b(3)
double precision :: f_mf_ab_p,f_mf_ab_m
double precision :: two_bod_dens_p, two_bod_dens_m
double precision :: dm_a_p, dm_a_m
double precision :: dm_b_p, dm_b_m
double precision :: rbis(3), dr
double precision :: accu_grad_f_mf_ab(3),accu_grad_two_bod_dens(3)
double precision :: accu_grad_dm_a(3),accu_grad_dm_b(3)
double precision :: accu_f_mf_ab, accu_two_bod_dens, accu_dm_a, accu_dm_b
dr = 0.00001d0
accu_f_mf_ab = 0.d0
accu_two_bod_dens = 0.d0
accu_dm_a = 0.d0
accu_dm_b = 0.d0
accu_grad_f_mf_ab = 0.d0
accu_grad_two_bod_dens = 0.d0
accu_grad_dm_a = 0.d0
accu_grad_dm_b = 0.d0
do i_point = 1, n_points_final_grid
r(1:3) = final_grid_points(1:3,i_point)
weight = final_weight_at_r_vector(i_point)
call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b)
call get_f_mf_ab(r,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p)
accu_f_mf_ab += weight * dabs(f_mf_ab - f_mf_ab_p)
accu_two_bod_dens += weight * dabs(two_bod_dens - two_bod_dens_p)
accu_dm_a += weight*dabs(dm_a - dm_a_p)
accu_dm_b += weight*dabs(dm_b - dm_b_p)
do k = 1, 3
rbis = r
rbis(k) += dr
call get_f_mf_ab(rbis,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p)
rbis = r
rbis(k) -= dr
call get_f_mf_ab(rbis,f_mf_ab_m,two_bod_dens_m, dm_a_m, dm_b_m)
num_grad_f_mf_ab(k) = (f_mf_ab_p - f_mf_ab_m)/(2.d0*dr)
num_grad_two_bod_dens(k) = (two_bod_dens_p - two_bod_dens_m)/(2.d0*dr)
num_grad_dm_a(k) = (dm_a_p - dm_a_m)/(2.d0*dr)
num_grad_dm_b(k) = (dm_b_p - dm_b_m)/(2.d0*dr)
enddo
do k = 1, 3
accu_grad_f_mf_ab(k) += weight * dabs(grad_f_mf_ab(k) - num_grad_f_mf_ab(k))
accu_grad_two_bod_dens(k) += weight * dabs(grad_two_bod_dens(k) - num_grad_two_bod_dens(k))
accu_grad_dm_a(k) += weight * dabs(grad_dm_a(k) - num_grad_dm_a(k))
accu_grad_dm_b(k) += weight * dabs(grad_dm_b(k) - num_grad_dm_b(k))
enddo
enddo
print*,'accu_f_mf_ab = ',accu_f_mf_ab
print*,'accu_two_bod_dens = ',accu_two_bod_dens
print*,'accu_dm_a = ',accu_dm_a
print*,'accu_dm_b = ',accu_dm_b
print*,'accu_grad_f_mf_ab = '
print*,accu_grad_f_mf_ab
print*,'accu_grad_two_bod_dens = '
print*,accu_grad_two_bod_dens
print*,'accu_dm_a = '
print*,accu_grad_dm_a
print*,'accu_dm_b = '
print*,accu_grad_dm_b
end
subroutine test_grad_mu_mf
implicit none
integer :: i_point,k
double precision :: weight,r(3),rbis(3)
double precision :: mu_mf, dm,grad_mu_mf(3), grad_dm(3)
double precision :: mu_mf_p, mu_mf_m, dm_m, dm_p, num_grad_mu_mf(3),dr, num_grad_dm(3)
double precision :: accu_mu, accu_dm, accu_grad_dm(3), accu_grad_mu_mf(3)
dr = 0.00001d0
accu_grad_mu_mf = 0.d0
accu_mu = 0.d0
accu_grad_dm = 0.d0
accu_dm = 0.d0
do i_point = 1, n_points_final_grid
r(1:3) = final_grid_points(1:3,i_point)
weight = final_weight_at_r_vector(i_point)
call grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm)
call mu_of_r_mean_field(r,mu_mf_p, dm_p)
accu_mu += weight*dabs(mu_mf_p - mu_mf)
accu_dm += weight*dabs(dm_p - dm)
do k = 1, 3
rbis = r
rbis(k) += dr
call mu_of_r_mean_field(rbis,mu_mf_p, dm_p)
rbis = r
rbis(k) -= dr
call mu_of_r_mean_field(rbis,mu_mf_m, dm_m)
num_grad_mu_mf(k) = (mu_mf_p - mu_mf_m)/(2.d0*dr)
num_grad_dm(k) = (dm_p - dm_m)/(2.d0*dr)
enddo
do k = 1, 3
accu_grad_dm(k)+= weight *dabs(num_grad_dm(k) - grad_dm(k))
accu_grad_mu_mf(k)+= weight *dabs(num_grad_mu_mf(k) - grad_mu_mf(k))
enddo
enddo
print*,'accu_mu = ',accu_mu
print*,'accu_dm = ',accu_dm
print*,'accu_grad_dm = '
print*, accu_grad_dm
print*,'accu_grad_mu_mf = '
print*, accu_grad_mu_mf
end

View File

@ -12,6 +12,9 @@ program four_idx_transform
! !
END_DOC END_DOC
if (do_mo_cholesky) then
stop 'Not implemented with Cholesky integrals'
endif
io_mo_two_e_integrals = 'Write' io_mo_two_e_integrals = 'Write'
SOFT_TOUCH io_mo_two_e_integrals SOFT_TOUCH io_mo_two_e_integrals
if (.true.) then if (.true.) then

Some files were not shown because too many files have changed in this diff Show More