3
0
mirror of https://github.com/triqs/dft_tools synced 2025-01-12 22:18:23 +01:00
dft_tools/triqs/arrays/blas_lapack/ger.hpp
Olivier Parcollet b534936589 Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.

- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector

- we also suppress a_x_ty class.

-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.

-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.

-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).

-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-11 18:49:22 +02:00

83 lines
4.3 KiB
C++

/*******************************************************************************
*
* TRIQS: a Toolbox for Research in Interacting Quantum Systems
*
* Copyright (C) 2012 by O. Parcollet
*
* TRIQS is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* TRIQS is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* TRIQS. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
#ifndef TRIQS_ARRAYS_BLAS_LAPACK_GER_H
#define TRIQS_ARRAYS_BLAS_LAPACK_GER_H
#include "./tools.hpp"
#include "./qcache.hpp"
namespace triqs { namespace arrays { namespace blas {
using namespace blas_lapack_tools;
namespace f77 { // overload
typedef std::complex<double> dcomplex;
extern "C" {
void TRIQS_FORTRAN_MANGLING(dger)(const int &M, const int &N, const double &, const double [], const int &, const double [], const int &, double [], const int &);
void TRIQS_FORTRAN_MANGLING(zgeru)(const int &M, const int &N, const dcomplex &, const dcomplex [], const int &, const dcomplex [], const int &, dcomplex [], const int &);
}
inline void ger (const int & M, const int & N, const double & alpha, const double* x, const int & incx, const double* Y, const int & incy, double* A, const int & LDA) {
TRIQS_FORTRAN_MANGLING(dger)(M, N, alpha, x, incx, Y, incy, A, LDA);
}
inline void ger (const int & M, const int & N, const dcomplex & alpha, const dcomplex* x, const int & incx, const dcomplex* Y, const int & incy, dcomplex* A, const int & LDA) {
TRIQS_FORTRAN_MANGLING(zgeru)(M, N, alpha, x, incx, Y, incy, A, LDA);
}
}
/**
* Calls ger : A += alpha * x * ty
* Takes care of making temporary copies if necessary
*/
template< typename VTX, typename VTY, typename MT>
typename std::enable_if< is_blas_lapack_type<typename VTX::value_type>::value && have_same_value_type< VTX, VTY, MT>::value >::type
ger (typename VTX::value_type alpha, VTX const & X, VTY const & Y, MT & A) {
static_assert( is_amv_value_or_view_class<MT>::value, "ger : A must be a matrix or a matrix_view");
if (( first_dim(A) != Y.size()) || (second_dim(A) != X.size())) TRIQS_RUNTIME_ERROR << "Dimension mismatch in ger : A : "<< get_shape(A()) <<" while X : "<<get_shape(X())<<" and Y : "<<get_shape(Y());
const_qcache<VTX> Cx(X); // mettre la condition a la main
const_qcache<VTY> Cy(Y); // mettre la condition a la main
reflexive_qcache<MT> Ca(A);
if (Ca().memory_layout_is_c()) // tA += alpha y tx
f77::ger(get_n_rows(Ca()), get_n_cols(Ca()), alpha, Cy().data_start(), Cy().stride(), Cx().data_start(), Cx().stride(), Ca().data_start(), get_ld(Ca()));
else
f77::ger(get_n_rows(Ca()), get_n_cols(Ca()), alpha, Cx().data_start(), Cx().stride(), Cy().data_start(), Cy().stride(), Ca().data_start(), get_ld(Ca()));
/* std::cerr << " Meme labout C"<< Ca().memory_layout_is_c() << " "<<A.memory_layout_is_c()<<std::endl ;
std::cerr<< " has_contiguous_data(A) : "<< has_contiguous_data(A) << std::endl;
std::cerr<< Ca()<< std::endl;
std::cerr<< Ca()(0,0) << " "<< Ca()(1,0) << " "<< Ca()(0,1) << " "<< Ca()(1,1) << " "<< std::endl;
std::cerr<< Ca().data_start()[0]<< " "<< Ca().data_start()[1]<< " "<< Ca().data_start()[2]<< " " << Ca().data_start()[3]<< " "<<std::endl;
std::cerr<< A<< std::endl;
std::cerr<< A(0,0) << " "<< A(1,0) << " "<< A(0,1) << " "<< A(1,1) << " "<< std::endl;
std::cerr<< A.data_start()[0]<< " "<< A.data_start()[1]<< " "<< A.data_start()[2]<< " " << A.data_start()[3]<< " "<<std::endl;
*/
}
// to allow ger (alpha, x,y, M(..., ...)) i.e. a temporary view, which is not matched by previos templates
// which require an lvalue
template< typename A, typename VTX, typename VTY, typename V, ull_t Opt, ull_t To, bool W>
void ger (A alpha, VTX const & x, VTY const & y, matrix_view<V,Opt,To,W> && r) { ger(alpha,x,y,r);}
}}}// namespace
#endif