#pragma once #include #include //#define USE_OMP //#define USE_OMP_OFFLOAD_CUDA #ifdef USE_OMP_OFFLOAD_CUDA #include #include #include #include #include #endif lapack_int inverse(double *A, uint64_t Dim, uint64_t Lds); int min(int a, int b); uint32_t qmckl_sherman_morrison( const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); uint32_t qmckl_sherman_morrison_splitting( const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); uint32_t qmckl_sherman_morrison_smw32s( const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); uint32_t qmckl_woodbury_3( const uint64_t vLDS, const uint64_t vDim, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); uint32_t qmckl_woodbury_k( const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); #ifdef USE_OMP uint32_t qmckl_woodbury_k_omp( const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); #endif #ifdef USE_OMP_OFFLOAD_CUDA uint32_t qmckl_woodbury_k_ompol_cuda_async( cublasHandle_t b_handle, cusolverDnHandle_t s_handle, const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); uint32_t qmckl_woodbury_k_ompol_cuda_sync( cublasHandle_t b_handle, cusolverDnHandle_t s_handle, const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); #endif uint32_t qmckl_woodbury_2( const uint64_t vLDS, const uint64_t vDim, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); void detupd( const uint64_t Dim, const uint64_t Lds, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant); uint32_t qmckl_sherman_morrison_later( const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates, const double *__restrict __attribute__((aligned(8))) Updates, const uint64_t *__restrict Updates_index, const double breakdown, double *__restrict __attribute__((aligned(8))) Slater_inv, double *__restrict determinant);