60 KiB
BLAS functions
Data types
Vector
Variable | Type | Description |
---|---|---|
size |
int64_t |
Dimension of the vector |
data |
double* |
Elements |
typedef struct qmckl_vector {
int64_t size;
double* data;
} qmckl_vector;
qmckl_vector
qmckl_vector_alloc( qmckl_context context,
const int64_t size);
Allocates a new vector. If the allocation failed the size is zero.
qmckl_vector
qmckl_vector_alloc( qmckl_context context,
const int64_t size)
{
/* Should always be true by contruction */
assert (size > (int64_t) 0);
qmckl_vector result;
result.size = size;
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
mem_info.size = size * sizeof(double);
result.data = (double*) qmckl_malloc (context, mem_info);
if (result.data == NULL) {
result.size = (int64_t) 0;
}
return result;
}
qmckl_exit_code
qmckl_vector_free( qmckl_context context,
qmckl_vector vector);
qmckl_exit_code
qmckl_vector_free( qmckl_context context,
qmckl_vector vector)
{
/* Always true */
assert (vector.data != NULL);
qmckl_exit_code rc;
rc = qmckl_free(context, vector.data);
if (rc != QMCKL_SUCCESS) {
return rc;
}
vector.size = (int64_t) 0;
vector.data = NULL;
return QMCKL_SUCCESS;
}
Matrix
Variable | Type | Description |
---|---|---|
size |
int64_t[2] |
Dimension of each component |
data |
double* |
Elements |
The dimensions use Fortran ordering: two elements differing by one in the first dimension are consecutive in memory.
typedef struct qmckl_matrix {
int64_t size[2];
double* data;
} qmckl_matrix;
qmckl_matrix
qmckl_matrix_alloc( qmckl_context context,
const int64_t size1,
const int64_t size2);
Allocates a new matrix. If the allocation failed the sizes are zero.
qmckl_matrix
qmckl_matrix_alloc( qmckl_context context,
const int64_t size1,
const int64_t size2)
{
/* Should always be true by contruction */
assert (size1 * size2 > (int64_t) 0);
qmckl_matrix result;
result.size[0] = size1;
result.size[1] = size2;
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
mem_info.size = size1 * size2 * sizeof(double);
result.data = (double*) qmckl_malloc (context, mem_info);
if (result.data == NULL) {
result.size[0] = (int64_t) 0;
result.size[1] = (int64_t) 0;
}
return result;
}
qmckl_exit_code
qmckl_matrix_free( qmckl_context context,
qmckl_matrix matrix);
qmckl_exit_code
qmckl_matrix_free( qmckl_context context,
qmckl_matrix matrix)
{
/* Always true */
assert (matrix.data != NULL);
qmckl_exit_code rc;
rc = qmckl_free(context, matrix.data);
if (rc != QMCKL_SUCCESS) {
return rc;
}
matrix.data = NULL;
matrix.size[0] = (int64_t) 0;
matrix.size[1] = (int64_t) 0;
return QMCKL_SUCCESS;
}
Tensor
Variable | Type | Description |
---|---|---|
order |
int64_t |
Order of the tensor |
size |
int64_t[QMCKL_TENSOR_ORDER_MAX] |
Dimension of each component |
data |
double* |
Elements |
The dimensions use Fortran ordering: two elements differing by one in the first dimension are consecutive in memory.
#define QMCKL_TENSOR_ORDER_MAX 16
typedef struct qmckl_tensor {
int64_t order;
int64_t size[QMCKL_TENSOR_ORDER_MAX];
double* data;
} qmckl_tensor;
qmckl_tensor
qmckl_tensor_alloc( qmckl_context context,
const int64_t order,
const int64_t* size);
Allocates memory for a tensor. If the allocation failed, the size is zero.
qmckl_tensor
qmckl_tensor_alloc( qmckl_context context,
const int64_t order,
const int64_t* size)
{
/* Should always be true by contruction */
assert (order > 0);
assert (order <= QMCKL_TENSOR_ORDER_MAX);
assert (size != NULL);
qmckl_tensor result;
result.order = order;
int64_t prod_size = (int64_t) 1;
for (int64_t i=0 ; i<order ; ++i) {
assert (size[i] > (int64_t) 0);
result.size[i] = size[i];
prod_size *= size[i];
}
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
mem_info.size = prod_size * sizeof(double);
result.data = (double*) qmckl_malloc (context, mem_info);
if (result.data == NULL) {
memset(&result, 0, sizeof(qmckl_tensor));
}
return result;
}
qmckl_exit_code
qmckl_tensor_free( qmckl_context context,
qmckl_tensor tensor);
qmckl_exit_code
qmckl_tensor_free( qmckl_context context,
qmckl_tensor tensor)
{
/* Always true */
assert (tensor.data != NULL);
qmckl_exit_code rc;
rc = qmckl_free(context, tensor.data);
if (rc != QMCKL_SUCCESS) {
return rc;
}
memset(&tensor, 0, sizeof(qmckl_tensor));
return QMCKL_SUCCESS;
}
Reshaping
Reshaping occurs in-place and the pointer to the data is copied.
Vector -> Matrix
qmckl_matrix
qmckl_matrix_of_vector(const qmckl_vector vector,
const int64_t size1,
const int64_t size2);
Reshapes a vector into a matrix.
qmckl_matrix
qmckl_matrix_of_vector(const qmckl_vector vector,
const int64_t size1,
const int64_t size2)
{
/* Always true */
assert (size1 * size2 == vector.size);
qmckl_matrix result;
result.size[0] = size1;
result.size[1] = size2;
result.data = vector.data;
return result;
}
Vector -> Tensor
qmckl_tensor
qmckl_tensor_of_vector(const qmckl_vector vector,
const int64_t order,
const int64_t* size);
Reshapes a vector into a tensor.
qmckl_tensor
qmckl_tensor_of_vector(const qmckl_vector vector,
const int64_t order,
const int64_t* size)
{
qmckl_tensor result;
int64_t prod_size = 1;
for (int64_t i=0 ; i<order ; ++i) {
result.size[i] = size[i];
prod_size *= size[i];
}
assert (prod_size == vector.size);
result.data = vector.data;
return result;
}
Matrix -> Vector
qmckl_vector
qmckl_vector_of_matrix(const qmckl_matrix matrix,
const int64_t size);
Reshapes a matrix into a vector.
qmckl_vector
qmckl_vector_of_matrix(const qmckl_matrix matrix,
const int64_t size)
{
/* Always true */
assert (matrix.size[0] * matrix.size[1] == size);
qmckl_vector result;
result.size = size;
result.data = matrix.data;
return result;
}
Matrix -> Tensor
qmckl_tensor
qmckl_tensor_of_matrix(const qmckl_matrix matrix,
const int64_t order,
const int64_t* size);
Reshapes a matrix into a tensor.
qmckl_tensor
qmckl_tensor_of_matrix(const qmckl_matrix matrix,
const int64_t order,
const int64_t* size)
{
qmckl_tensor result;
int64_t prod_size = 1;
for (int64_t i=0 ; i<order ; ++i) {
result.size[i] = size[i];
prod_size *= size[i];
}
assert (prod_size == matrix.size[0] * matrix.size[1]);
result.data = matrix.data;
return result;
}
Tensor -> Vector
qmckl_vector
qmckl_vector_of_tensor(const qmckl_tensor tensor,
const int64_t size);
Reshapes a tensor into a vector.
qmckl_vector
qmckl_vector_of_tensor(const qmckl_tensor tensor,
const int64_t size)
{
/* Always true */
int64_t prod_size = (int64_t) 1;
for (int64_t i=0 ; i<tensor.order ; i++) {
prod_size *= tensor.size[i];
}
assert (prod_size == size);
qmckl_vector result;
result.size = size;
result.data = tensor.data;
return result;
}
Tensor -> Matrix
qmckl_matrix
qmckl_matrix_of_tensor(const qmckl_tensor tensor,
const int64_t size1,
const int64_t size2);
Reshapes a tensor into a vector.
qmckl_matrix
qmckl_matrix_of_tensor(const qmckl_tensor tensor,
const int64_t size1,
const int64_t size2)
{
/* Always true */
int64_t prod_size = (int64_t) 1;
for (int64_t i=0 ; i<tensor.order ; i++) {
prod_size *= tensor.size[i];
}
assert (prod_size == size1 * size2);
qmckl_matrix result;
result.size[0] = size1;
result.size[1] = size2;
result.data = tensor.data;
return result;
}
Access macros
#define qmckl_vec(v, i) v.data[i]
#define qmckl_mat(m, i, j) m.data[(i) + (j)*m.size[0]]
#define qmckl_ten3(t, i, j, k) t.data[(i) + m.size[0]*((j) + size[1]*(k))]
#define qmckl_ten4(t, i, j, k, l) t.data[(i) + m.size[0]*((j) + size[1]*((k) + size[2]*(l)))]
#define qmckl_ten5(t, i, j, k, l, m) t.data[(i) + m.size[0]*((j) + size[1]*((k) + size[2]*((l) + size[3]*(m))))]
Tests
{
int64_t m = 3;
int64_t n = 4;
int64_t p = m*n;
qmckl_vector vec = qmckl_vector_alloc(context, p);
for (int64_t i=0 ; i<p ; ++i)
qmckl_vec(vec, i) = (double) i;
for (int64_t i=0 ; i<p ; ++i)
assert( vec.data[i] == (double) i );
qmckl_matrix mat = qmckl_matrix_of_vector(vec, m, n);
assert (mat.size[0] == m);
assert (mat.size[1] == n);
assert (mat.data == vec.data);
for (int64_t j=0 ; j<n ; ++j)
for (int64_t i=0 ; i<m ; ++i)
assert ( qmckl_mat(mat, i, j) == qmckl_vec(vec, i+j*m)) ;
qmckl_vector vec2 = qmckl_vector_of_matrix(mat, p);
assert (vec2.size == p);
assert (vec2.data == vec.data);
for (int64_t i=0 ; i<p ; ++i)
assert ( qmckl_vec(vec2, i) == qmckl_vec(vec, i) ) ;
qmckl_vector_free(context, vec);
}
Matrix operations
qmckl_dgemm
Matrix multiplication:
\[ C_{ij} = \beta C_{ij} + \alpha \sum_{k} A_{ik} \cdot B_{kj} \]
Variable | Type | In/Out | Description |
---|---|---|---|
context |
qmckl_context |
in | Global state |
TransA |
char |
in | 'T' is transposed |
TransB |
char |
in | 'T' is transposed |
m |
int64_t |
in | Number of rows of the input matrix |
n |
int64_t |
in | Number of columns of the input matrix |
k |
int64_t |
in | Number of columns of the input matrix |
alpha |
double |
in | Number of columns of the input matrix |
A |
double[][lda] |
in | Array containing the matrix $A$ |
lda |
int64_t |
in | Leading dimension of array A |
B |
double[][ldb] |
in | Array containing the matrix $B$ |
ldb |
int64_t |
in | Leading dimension of array B |
beta |
double |
in | Array containing the matrix $B$ |
C |
double[][ldc] |
out | Array containing the matrix $B$ |
ldc |
int64_t |
in | Leading dimension of array B |
Requirements:
context
is notQMCKL_NULL_CONTEXT
m > 0
n > 0
k > 0
lda >= m
ldb >= n
ldc >= n
A
is allocated with at least $m \times k \times 8$ bytesB
is allocated with at least $k \times n \times 8$ bytesC
is allocated with at least $m \times n \times 8$ bytes
qmckl_exit_code qmckl_dgemm (
const qmckl_context context,
const char TransA,
const char TransB,
const int64_t m,
const int64_t n,
const int64_t k,
const double alpha,
const double* A,
const int64_t lda,
const double* B,
const int64_t ldb,
const double beta,
double* const C,
const int64_t ldc );
integer function qmckl_dgemm_f(context, TransA, TransB, &
m, n, k, alpha, A, LDA, B, LDB, beta, C, LDC) &
result(info)
use qmckl
implicit none
integer(qmckl_context), intent(in) :: context
character , intent(in) :: TransA, TransB
integer*8 , intent(in) :: m, n, k
double precision , intent(in) :: alpha, beta
integer*8 , intent(in) :: lda
double precision , intent(in) :: A(lda,*)
integer*8 , intent(in) :: ldb
double precision , intent(in) :: B(ldb,*)
integer*8 , intent(in) :: ldc
double precision , intent(out) :: C(ldc,*)
info = QMCKL_SUCCESS
if (context == QMCKL_NULL_CONTEXT) then
info = QMCKL_INVALID_CONTEXT
return
endif
if (m <= 0_8) then
info = QMCKL_INVALID_ARG_4
return
endif
if (n <= 0_8) then
info = QMCKL_INVALID_ARG_5
return
endif
if (k <= 0_8) then
info = QMCKL_INVALID_ARG_6
return
endif
if (LDA <= 0) then
info = QMCKL_INVALID_ARG_9
return
endif
if (LDB <= 0) then
info = QMCKL_INVALID_ARG_11
return
endif
if (LDC <= 0) then
info = QMCKL_INVALID_ARG_14
return
endif
call dgemm(transA, transB, int(m,4), int(n,4), int(k,4), &
alpha, A, int(LDA,4), B, int(LDB,4), beta, C, int(LDC,4))
end function qmckl_dgemm_f
qmckl_adjugate
Given a matrix $\mathbf{A}$, the adjugate matrix $\text{adj}(\mathbf{A})$ is the transpose of the cofactors matrix of $\mathbf{A}$.
\[ \mathbf{B} = \text{adj}(\mathbf{A}) = \text{det}(\mathbf{A}) \, \mathbf{A}^{-1} \]
See also: https://en.wikipedia.org/wiki/Adjugate_matrix
Variable | Type | In/Out | Description |
---|---|---|---|
context |
qmckl_context |
in | Global state |
n |
int64_t |
in | Number of rows and columns of the input matrix |
A |
double[][lda] |
in | Array containing the $n \times n$ matrix $A$ |
lda |
int64_t |
in | Leading dimension of array A |
B |
double[][ldb] |
out | Adjugate of $A$ |
ldb |
int64_t |
in | Leading dimension of array B |
det_l |
double |
inout | determinant of $A$ |
Requirements:
context
is notQMCKL_NULL_CONTEXT
n > 0
lda >= m
A
is allocated with at least $m \times m \times 8$ bytesldb >= m
B
is allocated with at least $m \times m \times 8$ bytes
qmckl_exit_code qmckl_adjugate (
const qmckl_context context,
const int64_t n,
const double* A,
const int64_t lda,
double* const B,
const int64_t ldb,
double* det_l );
For small matrices (≤ 5× 5), we use specialized routines for performance motivations. For larger sizes, we rely on the LAPACK library.
integer function qmckl_adjugate_f(context, na, A, LDA, B, ldb, det_l) &
result(info)
use qmckl
implicit none
integer(qmckl_context) , intent(in) :: context
double precision, intent(in) :: A (LDA,*)
integer*8, intent(in) :: LDA
double precision, intent(out) :: B (LDB,*)
integer*8, intent(in) :: LDB
integer*8, intent(in) :: na
double precision, intent(inout) :: det_l
integer :: i,j
info = QMCKL_SUCCESS
if (context == QMCKL_NULL_CONTEXT) then
info = QMCKL_INVALID_CONTEXT
return
endif
if (na <= 0_8) then
info = QMCKL_INVALID_ARG_2
return
endif
if (LDA <= 0_8) then
info = QMCKL_INVALID_ARG_4
return
endif
if (LDA < na) then
info = QMCKL_INVALID_ARG_4
return
endif
select case (na)
case (5)
call adjugate5(A,LDA,B,LDB,na,det_l)
case (4)
call adjugate4(A,LDA,B,LDB,na,det_l)
case (3)
call adjugate3(A,LDA,B,LDB,na,det_l)
case (2)
call adjugate2(A,LDA,B,LDB,na,det_l)
case (1)
det_l = a(1,1)
b(1,1) = 1.d0
case default
call adjugate_general(context, na, A, LDA, B, LDB, det_l)
end select
end function qmckl_adjugate_f
subroutine adjugate_general(context, na, A, LDA, B, LDB, det_l)
use qmckl
implicit none
integer(qmckl_context), intent(in) :: context
double precision, intent(in) :: A (LDA,na)
integer*8, intent(in) :: LDA
double precision, intent(out) :: B (LDB,na)
integer*8, intent(in) :: LDB
integer*8, intent(in) :: na
double precision, intent(inout) :: det_l
double precision :: work(LDA*max(na,64))
integer :: inf
integer :: ipiv(LDA)
integer :: lwork
integer(8) :: i, j
We first copy the array A
into array B
.
B(1:na,1:na) = A(1:na,1:na)
Then, we compute the LU factorization $LU=A$
using the dgetrf
routine.
call dgetrf(na, na, B, LDB, ipiv, inf )
By convention, the determinant of $L$ is equal to one, so the determinant of $A$ is equal to the determinant of $U$, which is simply computed as the product of its diagonal elements.
det_l = 1.d0
j=0_8
do i=1,na
j = j+min(abs(ipiv(i)-i),1)
det_l = det_l*B(i,i)
enddo
As dgetrf
returns $PLU=A$ where $P$ is a permutation matrix, the
sign of the determinant is computed as $-1^m$ where $m$ is the
number of permutations.
if (iand(j,1_8) /= 0_8) then
det_l = -det_l
endif
Then, the inverse of $A$ is computed using dgetri
:
lwork = SIZE(work)
call dgetri(na, B, LDB, ipiv, work, lwork, inf )
and the adjugate matrix is computed as the product of the determinant with the inverse:
B(:,:) = B(:,:)*det_l
end subroutine adjugate_general