/*******************************************************************************
*
* TRIQS: a Toolbox for Research in Interacting Quantum Systems
*
* Copyright (C) 2012 by O. Parcollet
*
* TRIQS is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* TRIQS. If not, see .
*
******************************************************************************/
#ifndef TRIQS_ARRAYS_BLAS_LAPACK_GEMM_H
#define TRIQS_ARRAYS_BLAS_LAPACK_GEMM_H
#include
#include "./tools.hpp"
#include "./qcache.hpp"
namespace triqs { namespace arrays { namespace blas {
using namespace blas_lapack_tools;
namespace f77 { // overload
extern "C" {
void TRIQS_FORTRAN_MANGLING(dgemm) (char *, char *, const int & , const int & , const int & , const double &,
const double[], const int &, const double[], const int &, const double &, double[], const int & );
void TRIQS_FORTRAN_MANGLING(zgemm) (char *, char *, const int & , const int & , const int & , const std::complex &,
const std::complex[], const int &, const std::complex[], const int &, const std::complex &, std::complex[], const int & );
}
inline void gemm (char trans_a, char trans_b, const int & M, const int & N, const int & K, const double & alpha,
const double* A, const int & LDA, const double* B, const int & LDB, const double & beta, double* C, const int & LDC) {
TRIQS_FORTRAN_MANGLING(dgemm)(&trans_a,&trans_b,M,N,K,alpha, A, LDA, B, LDB, beta, C, LDC);
}
typedef std::complex dcomplex;
inline void gemm (char trans_a, char trans_b, const int & M, const int & N, const int & K, const dcomplex & alpha,
const dcomplex* A, const int & LDA, const dcomplex* B, const int & LDB, const dcomplex & beta, dcomplex* C, const int & LDC) {
TRIQS_FORTRAN_MANGLING(zgemm)(&trans_a,&trans_b,M,N,K,alpha, A, LDA, B, LDB, beta, C, LDC);
}
}
template
struct use_blas_gemm {
static_assert(is_amv_value_or_view_class::value, "output of matrix product must be a matrix or matrix_view");
//static constexpr bool are_both_value_view = is_amv_value_or_view_class::value && is_amv_value_or_view_class::value;
//static constexpr bool value = are_both_value_view && is_blas_lapack_type::value && have_same_value_type< MT1, MT2, MTOut>::value;
static constexpr bool value = is_blas_lapack_type::value && have_same_value_type< MT1, MT2, MTOut>::value;
// if inverse_lazy e.g. it is ok, we will use a cache anyway....
};
/**
* Calls gemm on a matrix or view
* Takes care of making temporary copies if necessary
*/
template
typename std::enable_if< use_blas_gemm::value >::type
gemm (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) {
//std::cerr << "gemm: blas call "<< std::endl ;
// first resize if necessary and possible
resize_or_check_if_view(C,make_shape(first_dim(A),second_dim(B)));
// now we use qcache instead of the matrix to make a copy if necessary ...
// not optimal : if stride == 1, N ---> use LDA parameters
// change the condition in the qcache construction....
reflexive_qcache Cc(C);
if (C.memory_layout_is_c()) {
// then tC = tB tA !
const_qcache Cb(A); // note the inversion A <-> B
const_qcache Ca(B); // note the inversion A <-> B
if (!(first_dim(Ca()) == second_dim(Cb()))) TRIQS_RUNTIME_ERROR << "Dimension mismatch in gemm : A : "<< get_shape(Ca()) <<" while B : "< Ca(A);
const_qcache Cb(B);
if (!(second_dim(Ca()) == first_dim(Cb()))) TRIQS_RUNTIME_ERROR << "Dimension mismatch in gemm : A : "<< get_shape(Ca()) <<" while B : "<
void gemm_generic (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) {
//std::cerr << "gemm: generic call "<< std::endl ;
// first resize if necessary and possible
resize_or_check_if_view(C,make_shape(first_dim(A),second_dim(B)));
if (second_dim(A) != first_dim(B)) TRIQS_RUNTIME_ERROR << "gemm generic : dimension mismatch "<< get_shape(A) << get_shape(B);
C() = 0;
for (int i=0; i
typename std::enable_if< !use_blas_gemm::value >::type
gemm (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) {
gemm_generic(alpha,A,B,beta,C);
}
// to allow gemm (alpha, a, b, beta, M(..., ...)) i.e. a temporary view, which is not matched by previos templates
// which require an lvalue. This is the only version which takes an && as last argument
// indeed, in the routine, c is a *lvalue*, since it has a name, and hence we call *other* overload of the function
template
void gemm (A alpha, MT1 const & a, MT2 const & b, B beta, matrix_view && c) { gemm(alpha,a,b,beta,c);}
}}}// namespace
#endif