/******************************************************************************* * * TRIQS: a Toolbox for Research in Interacting Quantum Systems * * Copyright (C) 2012 by O. Parcollet * * TRIQS is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * TRIQS. If not, see . * ******************************************************************************/ #ifndef TRIQS_ARRAYS_BLAS_LAPACK_GEMM_H #define TRIQS_ARRAYS_BLAS_LAPACK_GEMM_H #include #include "./tools.hpp" #include "./qcache.hpp" namespace triqs { namespace arrays { namespace blas { using namespace blas_lapack_tools; namespace f77 { // overload extern "C" { void TRIQS_FORTRAN_MANGLING(dgemm) (char *, char *, const int & , const int & , const int & , const double &, const double[], const int &, const double[], const int &, const double &, double[], const int & ); void TRIQS_FORTRAN_MANGLING(zgemm) (char *, char *, const int & , const int & , const int & , const std::complex &, const std::complex[], const int &, const std::complex[], const int &, const std::complex &, std::complex[], const int & ); } inline void gemm (char trans_a, char trans_b, const int & M, const int & N, const int & K, const double & alpha, const double* A, const int & LDA, const double* B, const int & LDB, const double & beta, double* C, const int & LDC) { TRIQS_FORTRAN_MANGLING(dgemm)(&trans_a,&trans_b,M,N,K,alpha, A, LDA, B, LDB, beta, C, LDC); } typedef std::complex dcomplex; inline void gemm (char trans_a, char trans_b, const int & M, const int & N, const int & K, const dcomplex & alpha, const dcomplex* A, const int & LDA, const dcomplex* B, const int & LDB, const dcomplex & beta, dcomplex* C, const int & LDC) { TRIQS_FORTRAN_MANGLING(zgemm)(&trans_a,&trans_b,M,N,K,alpha, A, LDA, B, LDB, beta, C, LDC); } } template struct use_blas_gemm { static_assert(is_amv_value_or_view_class::value, "output of matrix product must be a matrix or matrix_view"); //static constexpr bool are_both_value_view = is_amv_value_or_view_class::value && is_amv_value_or_view_class::value; //static constexpr bool value = are_both_value_view && is_blas_lapack_type::value && have_same_value_type< MT1, MT2, MTOut>::value; static constexpr bool value = is_blas_lapack_type::value && have_same_value_type< MT1, MT2, MTOut>::value; // if inverse_lazy e.g. it is ok, we will use a cache anyway.... }; /** * Calls gemm on a matrix or view * Takes care of making temporary copies if necessary */ template typename std::enable_if< use_blas_gemm::value >::type gemm (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) { //std::cerr << "gemm: blas call "<< std::endl ; // first resize if necessary and possible resize_or_check_if_view(C,make_shape(first_dim(A),second_dim(B))); // now we use qcache instead of the matrix to make a copy if necessary ... // not optimal : if stride == 1, N ---> use LDA parameters // change the condition in the qcache construction.... reflexive_qcache Cc(C); if (C.memory_layout_is_c()) { // then tC = tB tA ! const_qcache Cb(A); // note the inversion A <-> B const_qcache Ca(B); // note the inversion A <-> B if (!(first_dim(Ca()) == second_dim(Cb()))) TRIQS_RUNTIME_ERROR << "Dimension mismatch in gemm : A : "<< get_shape(Ca()) <<" while B : "< Ca(A); const_qcache Cb(B); if (!(second_dim(Ca()) == first_dim(Cb()))) TRIQS_RUNTIME_ERROR << "Dimension mismatch in gemm : A : "<< get_shape(Ca()) <<" while B : "< void gemm_generic (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) { //std::cerr << "gemm: generic call "<< std::endl ; // first resize if necessary and possible resize_or_check_if_view(C,make_shape(first_dim(A),second_dim(B))); if (second_dim(A) != first_dim(B)) TRIQS_RUNTIME_ERROR << "gemm generic : dimension mismatch "<< get_shape(A) << get_shape(B); C() = 0; for (int i=0; i typename std::enable_if< !use_blas_gemm::value >::type gemm (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) { gemm_generic(alpha,A,B,beta,C); } // to allow gemm (alpha, a, b, beta, M(..., ...)) i.e. a temporary view, which is not matched by previos templates // which require an lvalue. This is the only version which takes an && as last argument // indeed, in the routine, c is a *lvalue*, since it has a name, and hence we call *other* overload of the function template void gemm (A alpha, MT1 const & a, MT2 const & b, B beta, matrix_view && c) { gemm(alpha,a,b,beta,c);} }}}// namespace #endif