mirror of
https://github.com/triqs/dft_tools
synced 2025-01-12 14:08:24 +01:00
b534936589
- The previous version of the * operator for matrix was too clever. It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C). The pb was in case of aliasing : when e.g. C = A, or is a part of A. gemm is not correct that case, and as a result generic code like a = a *b may not be correct in matrix case, which is unacceptable. - So we revert to a simple * operator for matrix that does immediate computation. Same thing for matrix* vector - we also suppress a_x_ty class. -> for M = a * b, when M is a matrix, there is no overhead due to move assignment -> however, when M is a view, there is an additionnal copy. -Correctness comes first, hence the fix. However, if one wants more speed and one can guarantee that there is no aliasing possible, then one has to write a direct gemm call. -> det_manip class was adapted, since in that case, we can show there no alias, and we want the speed gain, so the * ops where replaced by direct blas call (using the array blas interface). -> also gemm, gemv, ger were overloaded in the case the return matrix/vector (i.e. last parameter of the function) is not an lvalue, but a temporary view created on the fly.
143 lines
7.3 KiB
C++
143 lines
7.3 KiB
C++
/*******************************************************************************
|
|
*
|
|
* TRIQS: a Toolbox for Research in Interacting Quantum Systems
|
|
*
|
|
* Copyright (C) 2012 by O. Parcollet
|
|
*
|
|
* TRIQS is free software: you can redistribute it and/or modify it under the
|
|
* terms of the GNU General Public License as published by the Free Software
|
|
* Foundation, either version 3 of the License, or (at your option) any later
|
|
* version.
|
|
*
|
|
* TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* TRIQS. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
******************************************************************************/
|
|
#ifndef TRIQS_ARRAYS_BLAS_LAPACK_GEMM_H
|
|
#define TRIQS_ARRAYS_BLAS_LAPACK_GEMM_H
|
|
#include <complex>
|
|
#include "./tools.hpp"
|
|
#include "./qcache.hpp"
|
|
|
|
namespace triqs { namespace arrays { namespace blas {
|
|
|
|
using namespace blas_lapack_tools;
|
|
namespace f77 { // overload
|
|
|
|
extern "C" {
|
|
void TRIQS_FORTRAN_MANGLING(dgemm) (char *, char *, const int & , const int & , const int & , const double &,
|
|
const double[], const int &, const double[], const int &, const double &, double[], const int & );
|
|
void TRIQS_FORTRAN_MANGLING(zgemm) (char *, char *, const int & , const int & , const int & , const std::complex<double> &,
|
|
const std::complex<double>[], const int &, const std::complex<double>[], const int &, const std::complex<double> &, std::complex<double>[], const int & );
|
|
}
|
|
|
|
inline void gemm (char trans_a, char trans_b, const int & M, const int & N, const int & K, const double & alpha,
|
|
const double* A, const int & LDA, const double* B, const int & LDB, const double & beta, double* C, const int & LDC) {
|
|
TRIQS_FORTRAN_MANGLING(dgemm)(&trans_a,&trans_b,M,N,K,alpha, A, LDA, B, LDB, beta, C, LDC);
|
|
}
|
|
|
|
typedef std::complex<double> dcomplex;
|
|
inline void gemm (char trans_a, char trans_b, const int & M, const int & N, const int & K, const dcomplex & alpha,
|
|
const dcomplex* A, const int & LDA, const dcomplex* B, const int & LDB, const dcomplex & beta, dcomplex* C, const int & LDC) {
|
|
TRIQS_FORTRAN_MANGLING(zgemm)(&trans_a,&trans_b,M,N,K,alpha, A, LDA, B, LDB, beta, C, LDC);
|
|
}
|
|
}
|
|
|
|
template<typename MT1, typename MT2, typename MTOut>
|
|
struct use_blas_gemm {
|
|
static_assert(is_amv_value_or_view_class<MTOut>::value, "output of matrix product must be a matrix or matrix_view");
|
|
//static constexpr bool are_both_value_view = is_amv_value_or_view_class<MT1>::value && is_amv_value_or_view_class<MT2>::value;
|
|
//static constexpr bool value = are_both_value_view && is_blas_lapack_type<typename MT1::value_type>::value && have_same_value_type< MT1, MT2, MTOut>::value;
|
|
static constexpr bool value = is_blas_lapack_type<typename MT1::value_type>::value && have_same_value_type< MT1, MT2, MTOut>::value;
|
|
// if inverse_lazy e.g. it is ok, we will use a cache anyway....
|
|
};
|
|
|
|
/**
|
|
* Calls gemm on a matrix or view
|
|
* Takes care of making temporary copies if necessary
|
|
*/
|
|
template<typename MT1, typename MT2, typename MTOut>
|
|
typename std::enable_if< use_blas_gemm<MT1,MT2,MTOut>::value >::type
|
|
gemm (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) {
|
|
//std::cerr << "gemm: blas call "<< std::endl ;
|
|
// first resize if necessary and possible
|
|
resize_or_check_if_view(C,make_shape(first_dim(A),second_dim(B)));
|
|
|
|
// now we use qcache instead of the matrix to make a copy if necessary ...
|
|
// not optimal : if stride == 1, N ---> use LDA parameters
|
|
// change the condition in the qcache construction....
|
|
reflexive_qcache<MTOut> Cc(C);
|
|
|
|
if (C.memory_layout_is_c()) {
|
|
// then tC = tB tA !
|
|
const_qcache<MT1> Cb(A); // note the inversion A <-> B
|
|
const_qcache<MT2> Ca(B); // note the inversion A <-> B
|
|
if (!(first_dim(Ca()) == second_dim(Cb()))) TRIQS_RUNTIME_ERROR << "Dimension mismatch in gemm : A : "<< get_shape(Ca()) <<" while B : "<<get_shape(Cb());
|
|
char trans_a= get_trans(Ca(), true);
|
|
char trans_b= get_trans(Cb(), true);
|
|
int m = (trans_a == 'N' ? get_n_rows(Ca()) : get_n_cols(Ca()));
|
|
int n = (trans_b == 'N' ? get_n_cols(Cb()) : get_n_rows(Cb()));
|
|
int k = (trans_a == 'N' ? get_n_cols(Ca()) : get_n_rows(Ca()));
|
|
//std::cerr<< " about to call GEMM"<< std::endl ;
|
|
//std::cerr<< "A = "<< get_shape(Ca())<< Ca()<< std::endl;
|
|
//std::cerr<< "B = "<< get_shape(Cb())<< Cb()<< std::endl;
|
|
//std::cerr<< "C c" << get_shape(Cc()) << Cc().indexmap().strides() << std::endl;
|
|
//std::cerr<<Ca().memory_layout_is_c() <<Ca().memory_layout_is_fortran()<<std::endl;
|
|
//std::cerr<< get_n_rows(Ca())<<get_n_cols(Cb())<<get_n_cols(Ca()) << std::endl ;
|
|
f77::gemm(trans_a,trans_b,m,n,k,
|
|
alpha, Ca().data_start(), get_ld(Ca()) , Cb().data_start(), get_ld(Cb()), beta, Cc().data_start(), get_ld(Cc()));
|
|
//std::cerr << " gemm ok "<< std::endl ;
|
|
}
|
|
else {
|
|
const_qcache<MT1> Ca(A);
|
|
const_qcache<MT2> Cb(B);
|
|
if (!(second_dim(Ca()) == first_dim(Cb()))) TRIQS_RUNTIME_ERROR << "Dimension mismatch in gemm : A : "<< get_shape(Ca()) <<" while B : "<<get_shape(Cb());
|
|
char trans_a= get_trans(Ca(), false);
|
|
char trans_b= get_trans(Cb(), false);
|
|
int m = (trans_a == 'N' ? get_n_rows(Ca()) : get_n_cols(Ca()));
|
|
int n = (trans_b == 'N' ? get_n_cols(Cb()) : get_n_rows(Cb()));
|
|
int k = (trans_a == 'N' ? get_n_cols(Ca()) : get_n_rows(Ca()));
|
|
f77::gemm(trans_a,trans_b,m,n,k,
|
|
alpha, Ca().data_start(), get_ld(Ca()) , Cb().data_start(), get_ld(Cb()), beta, Cc().data_start(), get_ld(Cc()));
|
|
}
|
|
}
|
|
|
|
// make the generic version for non lapack types or more complex types
|
|
// largely suboptimal
|
|
template<typename MT1, typename MT2, typename MTOut>
|
|
void gemm_generic (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) {
|
|
//std::cerr << "gemm: generic call "<< std::endl ;
|
|
// first resize if necessary and possible
|
|
resize_or_check_if_view(C,make_shape(first_dim(A),second_dim(B)));
|
|
if (second_dim(A) != first_dim(B)) TRIQS_RUNTIME_ERROR << "gemm generic : dimension mismatch "<< get_shape(A) << get_shape(B);
|
|
C() = 0;
|
|
for (int i=0; i<first_dim(A); ++i)
|
|
for (int k=0; k<second_dim(A); ++k)
|
|
for (int j=0; j<second_dim(B); ++j)
|
|
C(i,j) += A(i,k)*B(k,j);
|
|
}
|
|
|
|
// generic version for non lapack
|
|
template<typename MT1, typename MT2, typename MTOut>
|
|
typename std::enable_if< !use_blas_gemm<MT1,MT2,MTOut>::value >::type
|
|
gemm (typename MT1::value_type alpha, MT1 const & A, MT2 const & B, typename MT1::value_type beta, MTOut & C) {
|
|
gemm_generic(alpha,A,B,beta,C);
|
|
}
|
|
|
|
// to allow gemm (alpha, a, b, beta, M(..., ...)) i.e. a temporary view, which is not matched by previos templates
|
|
// which require an lvalue. This is the only version which takes an && as last argument
|
|
// indeed, in the routine, c is a *lvalue*, since it has a name, and hence we call *other* overload of the function
|
|
template<typename A, typename MT1, typename MT2, typename B, typename V, ull_t Opt, ull_t To, bool W>
|
|
void gemm (A alpha, MT1 const & a, MT2 const & b, B beta, matrix_view<V,Opt,To,W> && c) { gemm(alpha,a,b,beta,c);}
|
|
|
|
}}}// namespace
|
|
|
|
|
|
#endif
|
|
|