2013-07-17 19:24:07 +02:00
/*******************************************************************************
*
* TRIQS : a Toolbox for Research in Interacting Quantum Systems
*
* Copyright ( C ) 2012 by O . Parcollet
*
* TRIQS is free software : you can redistribute it and / or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation , either version 3 of the License , or ( at your option ) any later
* version .
*
* TRIQS is distributed in the hope that it will be useful , but WITHOUT ANY
* WARRANTY ; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE . See the GNU General Public License for more
* details .
*
* You should have received a copy of the GNU General Public License along with
* TRIQS . If not , see < http : //www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef TRIQS_ARRAYS_BLAS_LAPACK_GER_H
# define TRIQS_ARRAYS_BLAS_LAPACK_GER_H
# include "./tools.hpp"
# include "./qcache.hpp"
namespace triqs { namespace arrays { namespace blas {
using namespace blas_lapack_tools ;
namespace f77 { // overload
typedef std : : complex < double > dcomplex ;
extern " C " {
void TRIQS_FORTRAN_MANGLING ( dger ) ( const int & M , const int & N , const double & , const double [ ] , const int & , const double [ ] , const int & , double [ ] , const int & ) ;
void TRIQS_FORTRAN_MANGLING ( zgeru ) ( const int & M , const int & N , const dcomplex & , const dcomplex [ ] , const int & , const dcomplex [ ] , const int & , dcomplex [ ] , const int & ) ;
}
inline void ger ( const int & M , const int & N , const double & alpha , const double * x , const int & incx , const double * Y , const int & incy , double * A , const int & LDA ) {
TRIQS_FORTRAN_MANGLING ( dger ) ( M , N , alpha , x , incx , Y , incy , A , LDA ) ;
}
inline void ger ( const int & M , const int & N , const dcomplex & alpha , const dcomplex * x , const int & incx , const dcomplex * Y , const int & incy , dcomplex * A , const int & LDA ) {
TRIQS_FORTRAN_MANGLING ( zgeru ) ( M , N , alpha , x , incx , Y , incy , A , LDA ) ;
}
}
/**
* Calls ger : A + = alpha * x * ty
* Takes care of making temporary copies if necessary
*/
template < typename VTX , typename VTY , typename MT >
typename std : : enable_if < is_blas_lapack_type < typename VTX : : value_type > : : value & & have_same_value_type < VTX , VTY , MT > : : value > : : type
ger ( typename VTX : : value_type alpha , VTX const & X , VTY const & Y , MT & A ) {
static_assert ( is_amv_value_or_view_class < MT > : : value , " ger : A must be a matrix or a matrix_view " ) ;
2013-07-29 22:44:43 +02:00
if ( ( first_dim ( A ) ! = Y . size ( ) ) | | ( second_dim ( A ) ! = X . size ( ) ) ) TRIQS_RUNTIME_ERROR < < " Dimension mismatch in ger : A : " < < get_shape ( A ( ) ) < < " while X : " < < get_shape ( X ( ) ) < < " and Y : " < < get_shape ( Y ( ) ) ;
2013-07-17 19:24:07 +02:00
const_qcache < VTX > Cx ( X ) ; // mettre la condition a la main
const_qcache < VTY > Cy ( Y ) ; // mettre la condition a la main
reflexive_qcache < MT > Ca ( A ) ;
if ( Ca ( ) . memory_layout_is_c ( ) ) // tA += alpha y tx
f77 : : ger ( get_n_rows ( Ca ( ) ) , get_n_cols ( Ca ( ) ) , alpha , Cy ( ) . data_start ( ) , Cy ( ) . stride ( ) , Cx ( ) . data_start ( ) , Cx ( ) . stride ( ) , Ca ( ) . data_start ( ) , get_ld ( Ca ( ) ) ) ;
else
f77 : : ger ( get_n_rows ( Ca ( ) ) , get_n_cols ( Ca ( ) ) , alpha , Cx ( ) . data_start ( ) , Cx ( ) . stride ( ) , Cy ( ) . data_start ( ) , Cy ( ) . stride ( ) , Ca ( ) . data_start ( ) , get_ld ( Ca ( ) ) ) ;
/* std::cerr << " Meme labout C"<< Ca().memory_layout_is_c() << " "<<A.memory_layout_is_c()<<std::endl ;
std : : cerr < < " has_contiguous_data(A) : " < < has_contiguous_data ( A ) < < std : : endl ;
std : : cerr < < Ca ( ) < < std : : endl ;
std : : cerr < < Ca ( ) ( 0 , 0 ) < < " " < < Ca ( ) ( 1 , 0 ) < < " " < < Ca ( ) ( 0 , 1 ) < < " " < < Ca ( ) ( 1 , 1 ) < < " " < < std : : endl ;
std : : cerr < < Ca ( ) . data_start ( ) [ 0 ] < < " " < < Ca ( ) . data_start ( ) [ 1 ] < < " " < < Ca ( ) . data_start ( ) [ 2 ] < < " " < < Ca ( ) . data_start ( ) [ 3 ] < < " " < < std : : endl ;
std : : cerr < < A < < std : : endl ;
std : : cerr < < A ( 0 , 0 ) < < " " < < A ( 1 , 0 ) < < " " < < A ( 0 , 1 ) < < " " < < A ( 1 , 1 ) < < " " < < std : : endl ;
std : : cerr < < A . data_start ( ) [ 0 ] < < " " < < A . data_start ( ) [ 1 ] < < " " < < A . data_start ( ) [ 2 ] < < " " < < A . data_start ( ) [ 3 ] < < " " < < std : : endl ;
*/
}
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
// to allow ger (alpha, x,y, M(..., ...)) i.e. a temporary view, which is not matched by previos templates
// which require an lvalue
template < typename A , typename VTX , typename VTY , typename V , ull_t Opt , ull_t To , bool W >
void ger ( A alpha , VTX const & x , VTY const & y , matrix_view < V , Opt , To , W > & & r ) { ger ( alpha , x , y , r ) ; }
2013-07-17 19:24:07 +02:00
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
} } } // namespace
2013-07-17 19:24:07 +02:00
# endif