2022-10-17 14:56:32 +02:00
|
|
|
#pragma once
|
|
|
|
|
2022-07-11 14:48:59 +02:00
|
|
|
#include <mkl_lapacke.h>
|
2022-07-20 19:09:55 +02:00
|
|
|
#include <mkl.h>
|
2022-07-11 14:48:59 +02:00
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
//#define USE_OMP
|
|
|
|
//#define USE_OMP_OFFLOAD_CUDA
|
2022-07-21 12:21:51 +02:00
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
#ifdef USE_OMP_OFFLOAD_CUDA
|
2022-10-10 11:01:53 +02:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <cublas_v2.h>
|
|
|
|
#include <cusolverDn.h>
|
|
|
|
#include <cusolver_common.h>
|
|
|
|
#include <cuda_runtime_api.h>
|
2022-07-21 12:21:51 +02:00
|
|
|
#endif
|
|
|
|
|
2022-07-22 11:34:29 +02:00
|
|
|
lapack_int inverse(double *A, uint64_t Dim, uint64_t Lds);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
2022-07-20 19:09:55 +02:00
|
|
|
int min(int a, int b);
|
|
|
|
|
2022-07-11 14:48:59 +02:00
|
|
|
uint32_t qmckl_sherman_morrison(
|
2022-11-08 15:35:25 +01:00
|
|
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
uint32_t qmckl_sherman_morrison_splitting(
|
2022-11-08 15:35:25 +01:00
|
|
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
uint32_t qmckl_sherman_morrison_smw32s(
|
2022-11-08 15:35:25 +01:00
|
|
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
|
|
|
|
|
|
|
uint32_t qmckl_woodbury_3(
|
|
|
|
const uint64_t vLDS, const uint64_t vDim,
|
|
|
|
const double *__restrict __attribute__((aligned(8)))
|
|
|
|
Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8)))
|
|
|
|
Slater_inv,
|
|
|
|
double *__restrict determinant);
|
|
|
|
|
|
|
|
uint32_t qmckl_woodbury_k(
|
|
|
|
const uint64_t vLDS,
|
|
|
|
const uint64_t vDim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
|
|
|
|
|
|
|
#ifdef USE_OMP
|
|
|
|
uint32_t qmckl_woodbury_k_omp(
|
|
|
|
const uint64_t vLDS,
|
|
|
|
const uint64_t vDim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
2022-07-21 12:21:51 +02:00
|
|
|
#endif
|
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
#ifdef USE_OMP_OFFLOAD_CUDA
|
|
|
|
uint32_t qmckl_woodbury_k_ompol_cuda_async(
|
|
|
|
cublasHandle_t b_handle,
|
|
|
|
cusolverDnHandle_t s_handle,
|
|
|
|
const uint64_t vLDS,
|
|
|
|
const uint64_t vDim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
|
|
|
|
|
|
|
uint32_t qmckl_woodbury_k_ompol_cuda_sync(
|
|
|
|
cublasHandle_t b_handle,
|
|
|
|
cusolverDnHandle_t s_handle,
|
|
|
|
const uint64_t vLDS,
|
|
|
|
const uint64_t vDim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
uint32_t qmckl_woodbury_2(
|
|
|
|
const uint64_t vLDS,
|
|
|
|
const uint64_t vDim,
|
|
|
|
const double *__restrict __attribute__((aligned(8)))
|
|
|
|
Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8)))
|
|
|
|
Slater_inv,
|
|
|
|
double *__restrict determinant);
|
|
|
|
|
|
|
|
void detupd(
|
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t Lds,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
uint32_t qmckl_sherman_morrison_later(
|
2022-11-08 15:35:25 +01:00
|
|
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
|
|
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
|
|
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
|
|
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
|
|
|
double *__restrict determinant);
|