2013-07-17 19:24:07 +02:00
/*******************************************************************************
*
* TRIQS : a Toolbox for Research in Interacting Quantum Systems
*
* Copyright ( C ) 2011 - 2013 by M . Ferrero , O . Parcollet
*
* TRIQS is free software : you can redistribute it and / or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation , either version 3 of the License , or ( at your option ) any later
* version .
*
* TRIQS is distributed in the hope that it will be useful , but WITHOUT ANY
* WARRANTY ; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE . See the GNU General Public License for more
* details .
*
* You should have received a copy of the GNU General Public License along with
* TRIQS . If not , see < http : //www.gnu.org/licenses/>.
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef TRIQS_DETMANIP_H
# define TRIQS_DETMANIP_H
# include <triqs/utility/first_include.hpp>
# include <vector>
# include <iterator>
# include <triqs/arrays.hpp>
# include <triqs/arrays/algorithms.hpp>
2013-08-22 11:41:17 +02:00
# include <triqs/arrays/linalg/det_and_inverse.hpp>
2013-07-17 19:24:07 +02:00
# include <triqs/arrays/blas_lapack/dot.hpp>
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
# include <triqs/arrays/blas_lapack/ger.hpp>
# include <triqs/arrays/blas_lapack/gemm.hpp>
# include <triqs/arrays/blas_lapack/gemv.hpp>
2013-07-17 19:24:07 +02:00
# include <triqs/utility/function_arg_ret_type.hpp>
namespace triqs { namespace det_manip {
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
namespace blas = arrays : : blas ;
2013-07-17 19:24:07 +02:00
/**
* \ brief Standard matrix / det manipulations used in several QMC .
*/
template < typename FunctionType >
class det_manip {
private :
typedef utility : : function_arg_ret_type < FunctionType > f_tr ;
static_assert ( f_tr : : arity = = 2 , " det_manip : the function must take two arguments ! " ) ;
// Do we REALLY need this ?
static_assert ( std : : is_same < typename f_tr : : template decay_arg < 0 > : : type , typename f_tr : : template decay_arg < 1 > : : type > : : value ,
" det_manip : the two arguments must of the function must have the same type " ) ;
public :
typedef typename f_tr : : template decay_arg < 0 > : : type xy_type ;
typedef typename f_tr : : result_type value_type ;
static_assert ( std : : is_floating_point < value_type > : : value | | boost : : is_complex < value_type > : : value , " det_manip : the function must return a floating number or a complex number " ) ;
typedef arrays : : vector < value_type > vector_type ;
typedef arrays : : matrix < value_type > matrix_type ;
typedef arrays : : matrix_view < value_type > matrix_view_type ;
protected : // the data
typedef std : : ptrdiff_t int_type ;
typedef arrays : : range range ;
FunctionType f ;
// serialized data. There are all VALUES.
value_type det ;
size_t Nmax , N , last_try ;
std : : vector < size_t > row_num , col_num ;
std : : vector < xy_type > x_values , y_values ;
int sign ;
matrix_type mat_inv ;
long long n_opts , n_opts_max_before_check ;
private :
// ------------ BOOST Serialization ------------
// What about f ? Not serialized at the moment.
friend class boost : : serialization : : access ;
template < class Archive >
void serialize ( Archive & ar ) {
using boost : : serialization : : make_nvp ;
ar & make_nvp ( " Nmax " , Nmax ) & make_nvp ( " N " , N )
& make_nvp ( " n_opts " , n_opts ) & make_nvp ( " n_opts_max_before_check " , n_opts_max_before_check )
& make_nvp ( " det " , det ) & make_nvp ( " sign " , sign )
& make_nvp ( " Minv " , mat_inv )
& make_nvp ( " row_num " , row_num ) & make_nvp ( " col_num " , col_num )
& make_nvp ( " x_values " , x_values ) & make_nvp ( " y_values " , y_values ) ;
}
/// Write into HDF5
friend void h5_write ( h5 : : group fg , std : : string subgroup_name , det_manip const & g ) {
auto gr = fg . create_group ( subgroup_name ) ;
h5_write ( gr , " N " , g . N ) ;
h5_write ( gr , " mat_inv " , g . mat_inv ) ;
h5_write ( gr , " det " , g . det ) ;
h5_write ( gr , " sign " , g . sign ) ;
//h5_write(gr,"row_num",g.row_num);
//h5_write(gr,"col_num",g.col_num);
h5_write ( gr , " x_values " , g . x_values ) ;
h5_write ( gr , " y_values " , g . y_values ) ;
h5_write ( gr , " n_opts " , g . n_opts ) ;
h5_write ( gr , " n_opts_max_before_check " , g . n_opts_max_before_check ) ;
}
/// Read from HDF5
friend void h5_read ( h5 : : group fg , std : : string subgroup_name , det_manip & g ) {
auto gr = fg . open_group ( subgroup_name ) ;
h5_read ( gr , " N " , g . N ) ;
h5_read ( gr , " mat_inv " , g . mat_inv ) ;
2013-07-29 22:44:43 +02:00
g . Nmax = first_dim ( g . mat_inv ) ; // restore Nmax
2013-07-17 19:24:07 +02:00
g . last_try = 0 ;
h5_read ( gr , " det " , g . det ) ;
h5_read ( gr , " sign " , g . sign ) ;
//h5_read(gr,"row_num",g.row_num);
//h5_read(gr,"col_num",g.col_num);
h5_read ( gr , " x_values " , g . x_values ) ;
h5_read ( gr , " y_values " , g . y_values ) ;
h5_read ( gr , " n_opts " , g . n_opts ) ;
h5_read ( gr , " n_opts_max_before_check " , g . n_opts_max_before_check ) ;
}
private :
// temporary work data, not saved, serialized, etc....
struct work_data_type1 {
xy_type x , y ;
vector_type MB , MC , B , C ;
value_type ksi ;
size_t i , j , ireal , jreal ;
void reserve ( size_t s ) { B . resize ( s ) ; C . resize ( s ) ; MB . resize ( s ) ; MC . resize ( s ) ; MB ( ) = 0 ; MC ( ) = 0 ; }
} ;
struct work_data_type2 {
xy_type x [ 2 ] , y [ 2 ] ;
matrix_type MB , MC , B , C , ksi ;
size_t i [ 2 ] , j [ 2 ] , ireal [ 2 ] , jreal [ 2 ] ;
void reserve ( size_t s ) { MB . resize ( s , 2 ) ; MC . resize ( 2 , s ) ; B . resize ( s , 2 ) , C . resize ( 2 , s ) ; ksi . resize ( 2 , 2 ) ; MB ( ) = 0 ; MC ( ) = 0 ; }
value_type det_ksi ( ) const { return ksi ( 0 , 0 ) * ksi ( 1 , 1 ) - ksi ( 1 , 0 ) * ksi ( 0 , 1 ) ; }
} ;
work_data_type1 w1 ;
work_data_type2 w2 ;
value_type newdet ;
int newsign ;
private : // for the move constructor, I need to separate the swap since f may not be defaulted constructed
void swap_but_f ( det_manip & rhs ) noexcept {
using std : : swap ;
# define SW(a) swap(this->a,rhs.a)
SW ( det ) ; SW ( Nmax ) ; SW ( N ) ; SW ( last_try ) ;
SW ( row_num ) ; SW ( col_num ) ;
SW ( x_values ) ; SW ( y_values ) ;
SW ( sign ) ; SW ( mat_inv ) ; SW ( n_opts ) ; SW ( n_opts_max_before_check ) ;
SW ( w1 ) ; SW ( w2 ) ; SW ( newdet ) ; SW ( newsign ) ;
# undef SW
}
friend void swap ( det_manip & lhs , det_manip & rhs ) noexcept {
using std : : swap ;
swap ( lhs . f , rhs . f ) ;
lhs . swap_but_f ( rhs ) ;
}
public :
/**
* Like for std : : vector , reserve memory for a bigger size .
* Preserves only the matrix , not the temporary working vectors / matrices , so do NOT use it
* between a try_XXX and acomplete_operation
*/
void reserve ( size_t new_size ) {
if ( new_size < = Nmax ) return ;
matrix_type Mcopy ( mat_inv ) ;
size_t N0 = Nmax ; Nmax = new_size ;
mat_inv . resize ( Nmax , Nmax ) ; mat_inv ( range ( 0 , N0 ) , range ( 0 , N0 ) ) = Mcopy ; // keep the content of mat_inv ---> into the lib ?
row_num . reserve ( Nmax ) ; col_num . reserve ( Nmax ) ; x_values . reserve ( Nmax ) ; y_values . reserve ( Nmax ) ;
w1 . reserve ( Nmax ) ; w2 . reserve ( Nmax ) ;
}
private :
void _construct_common ( ) {
last_try = 0 ; sign = 1 ;
n_opts = 0 ; n_opts_max_before_check = 100 ;
}
public :
/**
* \ brief Constructor .
*
* \ param F The function ( NB : a copy is made of the F object in this class ) .
* \ param init_size The maximum size of the matrix before a resize ( like reserve in std : : vector ) .
* Like std : : vector , resize is automatic ( by a factor 2 ) but can yield a performance penalty
* if it happens too often .
*/
det_manip ( FunctionType F , size_t init_size ) :
f ( std : : move ( F ) ) , Nmax ( 0 ) , N ( 0 ) {
reserve ( init_size ) ;
mat_inv ( ) = 0 ;
det = 1 ;
_construct_common ( ) ;
}
/** \brief Constructor.
*
* \ param F The function ( NB : a copy is made of the F object in this class ) .
* \ tparam ArgumentContainer
* \ param X , Y : container for X , Y .
*/
template < typename ArgumentContainer1 , typename ArgumentContainer2 >
det_manip ( FunctionType F , ArgumentContainer1 const & X , ArgumentContainer2 const & Y ) : f ( std : : move ( F ) ) {
if ( X . size ( ) ! = Y . size ( ) ) TRIQS_RUNTIME_ERROR < < " X.size != Y.size " ;
_construct_common ( ) ;
N = X . size ( ) ;
if ( N = = 0 ) { det = 1 ; reserve ( 30 ) ; return ; }
if ( N > Nmax ) reserve ( 2 * N ) ; // put some margin..
std : : copy ( X . begin ( ) , X . end ( ) , std : : back_inserter ( x_values ) ) ;
std : : copy ( Y . begin ( ) , Y . end ( ) , std : : back_inserter ( y_values ) ) ;
mat_inv ( ) = 0 ;
for ( size_t i = 0 ; i < N ; + + i ) {
row_num . push_back ( i ) ; col_num . push_back ( i ) ;
for ( size_t j = 0 ; j < N ; + + j )
mat_inv ( i , j ) = f ( x_values [ i ] , y_values [ j ] ) ;
}
range R ( 0 , N ) ;
det = arrays : : determinant ( mat_inv ( R , R ) ) ;
mat_inv ( R , R ) = inverse ( mat_inv ( R , R ) ) ;
}
det_manip ( det_manip const & ) = default ;
det_manip ( det_manip & & rhs ) noexcept : f ( std : : move ( rhs . f ) ) { this - > swap_but_f ( rhs ) ; } // f need not have a default constructor and we dont swap the temp data...
//det_manip& operator=(const det_manip&) = default;
det_manip & operator = ( const det_manip & ) = delete ;
det_manip & operator = ( det_manip & & rhs ) noexcept { assert ( ( last_try = = 0 ) & & ( rhs . last_try = = 0 ) ) ; swap ( * this , rhs ) ; return * this ; }
/// Put to size 0 : like a vector
void clear ( ) {
N = 0 ; sign = 1 ; det = 1 ; last_try = 0 ;
row_num . clear ( ) ; col_num . clear ( ) ; x_values . clear ( ) ; y_values . clear ( ) ;
}
//----------------------- READ ACCESS TO DATA ----------------------------------
/// Current size of the matrix
size_t size ( ) const { return N ; }
/// Returns the i-th values of x
xy_type const & get_x ( size_t i ) const { return x_values [ row_num [ i ] ] ; }
/// Returns the j-th values of y
xy_type const & get_y ( size_t j ) const { return y_values [ col_num [ j ] ] ; }
/** det M of the current state of the matrix. */
value_type determinant ( ) const { return sign * det ; }
/** Returns M^{-1}(i,j) */
value_type inverse_matrix ( size_t i , size_t j ) const { return mat_inv ( col_num [ i ] , row_num [ j ] ) ; } // warning : need to invert the 2 permutations.
/// Returns the inverse matrix. Warning : this is slow, since it create a new copy, and reorder the lines/cols
matrix_view_type inverse_matrix ( ) const {
matrix_type res ( N , N ) ;
for ( size_t i = 0 ; i < N ; i + + )
for ( size_t j = 0 ; j < N ; j + + )
res ( i , j ) = inverse_matrix ( i , j ) ;
return res ;
}
/// Rebuild the matrix. Warning : this is slow, since it create a new matrix and re-evaluate the function.
matrix_view_type matrix ( ) const {
matrix_type res ( N , N ) ;
for ( size_t i = 0 ; i < N ; i + + )
for ( size_t j = 0 ; j < N ; j + + )
res ( i , j ) = f ( get_x ( i ) , get_y ( j ) ) ;
return res ;
}
// Given a lambda f : x,y,M, it calls f(x_i,y_j,M_ji) for all i,j
// Order of iteration is NOT fixed, it is optimised (for memory traversal)
template < typename LambdaType >
friend void foreach ( det_manip const & d , LambdaType const & f ) {
//for (size_t i=0; i<d.N;i++)
//for (size_t j=0; j<d.N;j++)
// f(d.x_values[i], d.y_values[j], d.mat_inv(j,i));
range R ( 0 , d . N ) ;
foreach ( d . mat_inv ( R , R ) , [ & f , & d ] ( int i , int j ) { return f ( d . x_values [ i ] , d . y_values [ j ] , d . mat_inv ( j , i ) ) ; } ) ;
}
// ------------------------- OPERATIONS -----------------------------------------------
/**
* Insert operation at colum j0 and row i0 .
*
* The operation consists in adding :
*
* * a column f ( x_i , y_ { j0 } )
* * and a row f ( x_ { i0 } , x_j )
*
* The new colum / row will be at col j0 , row i0 .
*
* 0 < = i0 , j0 < = N , where N is the current size of the matrix .
* The current column j0 ( resp . row i0 ) will become column j0 + 1 ( resp . row i0 + 1 ) .
* Inserting at N simply add the new col at the end .
* Returns the ratio of det Minv_new / det Minv .
*
* This routine does NOT make any modification . It has to be completed with complete_operation ( ) .
*/
value_type try_insert ( size_t i , size_t j , xy_type const & x , xy_type const & y ) {
// check input and store it for complete_operation
assert ( i < = N ) ; assert ( j < = N ) ; assert ( i > = 0 ) ; assert ( j > = 0 ) ;
if ( N = = Nmax ) reserve ( 2 * Nmax ) ;
last_try = 1 ;
w1 . i = i ; w1 . j = j ; w1 . x = x ; w1 . y = y ;
// treat empty matrix separately
if ( N = = 0 ) { newdet = f ( x , y ) ; newsign = 1 ; return newdet ; }
// I add the row and col and the end. If the move is rejected,
// no effect since N will not be changed : Minv(i,j) for i,j>=N has no meaning.
for ( size_t k = 0 ; k < N ; k + + ) {
w1 . B ( k ) = f ( x_values [ k ] , y ) ;
w1 . C ( k ) = f ( x , y_values [ k ] ) ;
}
range R ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w1.MB(R) = mat_inv(R,R) * w1.B(R);// OPTIMIZE BELOW
blas : : gemv ( 1.0 , mat_inv ( R , R ) , w1 . B ( R ) , 0.0 , w1 . MB ( R ) ) ;
2013-07-17 19:24:07 +02:00
w1 . ksi = f ( x , y ) - arrays : : dot ( w1 . C ( R ) , w1 . MB ( R ) ) ;
newdet = det * w1 . ksi ;
newsign = ( ( i + j ) % 2 = = 0 ? sign : - sign ) ; // since N-i0 + N-j0 = i0+j0 [2]
2013-08-27 13:50:30 +02:00
return w1 . ksi * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
}
//fx gives the new line coefficients, fy gives the new column coefficients and ksi is the last coeff (at the intersection of the line and the column).
template < typename Fx , typename Fy >
value_type try_insert_from_function ( size_t i , size_t j , Fx fx , Fy fy , value_type const ksi ) {
// check input and store it for complete_operation
assert ( i < = N ) ; assert ( j < = N ) ; assert ( i > = 0 ) ; assert ( j > = 0 ) ;
if ( N = = Nmax ) reserve ( 2 * Nmax ) ;
last_try = 1 ;
w1 . i = i ; w1 . j = j ;
// treat empty matrix separately
if ( N = = 0 ) { newdet = ksi ; newsign = 1 ; return newdet ; }
// I add the row and col and the end. If the move is rejected,
// no effect since N will not be changed : Minv(i,j) for i,j>=N has no meaning.
for ( size_t k = 0 ; k < N ; k + + ) {
w1 . B ( k ) = fx ( x_values [ k ] ) ;
w1 . C ( k ) = fy ( y_values [ k ] ) ;
}
range R ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w1.MB(R) = mat_inv(R,R) * w1.B(R);// OPTIMIZE BELOW
blas : : gemv ( 1.0 , mat_inv ( R , R ) , w1 . B ( R ) , 0.0 , w1 . MB ( R ) ) ;
2013-08-27 13:50:30 +02:00
w1 . ksi = ksi - arrays : : dot ( w1 . C ( R ) , w1 . MB ( R ) ) ;
newdet = det * w1 . ksi ;
newsign = ( ( i + j ) % 2 = = 0 ? sign : - sign ) ; // since N-i0 + N-j0 = i0+j0 [2]
return w1 . ksi * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
2013-07-17 19:24:07 +02:00
}
//------------------------------------------------------------------------------------------
private :
void complete_insert ( ) {
// store the new value of x,y. They are seen through the same permutations as rows and cols resp.
x_values . push_back ( w1 . x ) ; y_values . push_back ( w1 . y ) ;
row_num . push_back ( 0 ) ; col_num . push_back ( 0 ) ;
// special empty case again
if ( N = = 0 ) { N = 1 ; mat_inv ( 0 , 0 ) = 1 / newdet ; return ; }
range R1 ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w1.MC(R1) = mat_inv(R1,R1).transpose() * w1.C(R1); //OPTIMIZE BELOW
blas : : gemv ( 1.0 , mat_inv ( R1 , R1 ) . transpose ( ) , w1 . C ( R1 ) , 0.0 , w1 . MC ( R1 ) ) ;
2013-07-17 19:24:07 +02:00
w1 . MC ( N ) = - 1 ;
w1 . MB ( N ) = - 1 ;
N + + ;
// keep the real position of the row/col
// since we insert a col/row, we have first to push the col at the right
// and then say that col w1.i is stored in N, the last col.
// same for rows
for ( int_type i = N - 2 ; i > = int_type ( w1 . i ) ; i - - ) row_num [ i + 1 ] = row_num [ i ] ;
row_num [ w1 . i ] = N - 1 ;
for ( int_type i = N - 2 ; i > = int_type ( w1 . j ) ; i - - ) col_num [ i + 1 ] = col_num [ i ] ;
col_num [ w1 . j ] = N - 1 ;
// Minv is ok, we need to complete
w1 . ksi = 1 / w1 . ksi ;
// compute the change to the inverse
// M += w1.ksi w1.MB w1.MC with BLAS. first put the 0
range R ( 0 , N ) ;
mat_inv ( R , N - 1 ) = 0 ;
mat_inv ( N - 1 , R ) = 0 ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//mat_inv(R,R) += w1.ksi* w1.MB(R) * w1.MC(R)// OPTIMIZE BELOW
blas : : ger ( w1 . ksi , w1 . MB ( R ) , w1 . MC ( R ) , mat_inv ( R , R ) ) ;
2013-07-17 19:24:07 +02:00
}
public :
//------------------------------------------------------------------------------------------
/**
* Double Insert operation at colum j0 , j1 and row i0 , i1 .
*
* The operation consists in adding :
* * a column f ( x_i , y_ { j0 } )
* * and a row f ( x_ { i0 } , x_j )
* The new colum / row will be at col j0 , row i0 .
*
* 0 < = i0 , i1 , j0 , j1 < = N + 1 , where N is the current size of the matrix .
* Returns the ratio of det Minv_new / det Minv .
* This routine does NOT make any modification . It has to be completed with complete_operation ( ) .
*/
value_type try_insert2 ( size_t i0 , size_t i1 , size_t j0 , size_t j1 , xy_type const & x0 , xy_type const & x1 , xy_type const & y0 , xy_type const & y1 ) {
// check input and store it for complete_operation
assert ( i0 ! = i1 ) ; assert ( j0 ! = j1 ) ; assert ( i0 < = N ) ; assert ( j0 < = N ) ; assert ( i0 > = 0 ) ; assert ( j0 > = 0 ) ;
assert ( i1 < = N + 1 ) ; assert ( j1 < = N + 1 ) ; assert ( i1 > = 0 ) ; assert ( j1 > = 0 ) ;
if ( N > = Nmax ) reserve ( 2 * Nmax ) ; // check this resize ... we add 2 lines
last_try = 10 ;
w2 . i [ 0 ] = i0 ; w2 . i [ 1 ] = i1 ; w2 . j [ 0 ] = j0 ; w2 . j [ 1 ] = j1 ;
w2 . x [ 0 ] = x0 ; w2 . y [ 0 ] = y0 ; w2 . x [ 1 ] = x1 ; w2 . y [ 1 ] = y1 ;
// w1.ksi = Delta(x_values,y_values) - Cw.MB using BLAS
w2 . ksi ( 0 , 0 ) = f ( x0 , y0 ) ;
w2 . ksi ( 0 , 1 ) = f ( x0 , y1 ) ;
w2 . ksi ( 1 , 0 ) = f ( x1 , y0 ) ;
w2 . ksi ( 1 , 1 ) = f ( x1 , y1 ) ;
// treat empty matrix separately
if ( N = = 0 ) {
newdet = w2 . det_ksi ( ) ;
newsign = 1 ;
return newdet ;
}
// I add the rows and cols and the end. If the move is rejected,
// no effect since N will not be changed : inv_mat(i,j) for i,j>=N has no meaning.
for ( size_t k = 0 ; k < N ; k + + ) {
w2 . B ( k , 0 ) = f ( x_values [ k ] , y0 ) ;
w2 . B ( k , 1 ) = f ( x_values [ k ] , y1 ) ;
w2 . C ( 0 , k ) = f ( x0 , y_values [ k ] ) ;
w2 . C ( 1 , k ) = f ( x1 , y_values [ k ] ) ;
}
range R ( 0 , N ) , R2 ( 0 , 2 ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w2.MB(R,R2) = mat_inv(R,R) * w2.B(R,R2); // OPTIMIZE BELOW
blas : : gemm ( 1.0 , mat_inv ( R , R ) , w2 . B ( R , R2 ) , 0.0 , w2 . MB ( R , R2 ) ) ;
//w2.ksi -= w2.C (R2, R) * w2.MB(R, R2); // OPTIMIZE BELOW
blas : : gemm ( - 1.0 , w2 . C ( R2 , R ) , w2 . MB ( R , R2 ) , 1.0 , w2 . ksi ) ;
2013-07-17 19:24:07 +02:00
newdet = det * w2 . det_ksi ( ) ;
newsign = ( ( i0 + j0 + i1 + j1 ) % 2 = = 0 ? sign : - sign ) ; // since N-i0 + N-j0 + N + 1 -i1 + N+1 -j1 = i0+j0 [2]
return ( newdet / det ) * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
}
//------------------------------------------------------------------------------------------
private :
void complete_insert2 ( ) {
// store the new value of x,y. They are seen through the same permutations as rows and cols resp.
for ( int k = 0 ; k < 2 ; + + k ) {
x_values . push_back ( w2 . x [ k ] ) ; y_values . push_back ( w2 . y [ k ] ) ;
row_num . push_back ( 0 ) ; col_num . push_back ( 0 ) ;
}
range R2 ( 0 , 2 ) ;
if ( N = = 0 ) { N = 2 ; mat_inv ( R2 , R2 ) = inverse ( w2 . ksi ) ; row_num [ w2 . i [ 1 ] ] = 1 ; col_num [ w2 . j [ 1 ] ] = 1 ; return ; }
range Ri ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w2.MC(R2,Ri) = w2.C(R2,Ri) * mat_inv(Ri,Ri);// OPTIMIZE BELOW
blas : : gemm ( 1.0 , w2 . C ( R2 , Ri ) , mat_inv ( Ri , Ri ) , 0.0 , w2 . MC ( R2 , Ri ) ) ;
2013-07-17 19:24:07 +02:00
w2 . MC ( R2 , range ( N , N + 2 ) ) = - 1 ; // identity matrix
w2 . MB ( range ( N , N + 2 ) , R2 ) = - 1 ; // identity matrix !
// keep the real position of the row/col
// since we insert a col/row, we have first to push the col at the right
// and then say that col w2.i[0] is stored in N, the last col.
// same for rows
for ( int k = 0 ; k < 2 ; + + k ) {
N + + ;
for ( int_type i = N - 2 ; i > = int_type ( w2 . i [ k ] ) ; i - - ) row_num [ i + 1 ] = row_num [ i ] ;
row_num [ w2 . i [ k ] ] = N - 1 ;
for ( int_type i = N - 2 ; i > = int_type ( w2 . j [ k ] ) ; i - - ) col_num [ i + 1 ] = col_num [ i ] ;
col_num [ w2 . j [ k ] ] = N - 1 ;
}
w2 . ksi = inverse ( w2 . ksi ) ;
range R ( 0 , N ) ;
mat_inv ( R , range ( N - 2 , N ) ) = 0 ;
mat_inv ( range ( N - 2 , N ) , R ) = 0 ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//mat_inv(R,R) += w2.MB(R,R2) * (w2.ksi * w2.MC(R2,R)); // OPTIMIZE BELOW
blas : : gemm ( 1.0 , w2 . MB ( R , R2 ) , ( w2 . ksi * w2 . MC ( R2 , R ) ) , 1.0 , mat_inv ( R , R ) ) ;
2013-07-17 19:24:07 +02:00
}
public :
//------------------------------------------------------------------------------------------
/**
* Consider the removal the colj0 and row i0 from the matrix .
*
* Returns the ratio of det Minv_new / det Minv .
* This routine does NOT make any modification . It has to be completed with complete_operation ( ) .
*/
value_type try_remove ( size_t i , size_t j ) {
assert ( i < N ) ; assert ( j < N ) ; assert ( i > = 0 ) ; assert ( j > = 0 ) ;
w1 . i = i ; w1 . j = j ; last_try = 2 ;
w1 . jreal = col_num [ w1 . j ] ;
w1 . ireal = row_num [ w1 . i ] ;
// compute the newdet
// first we resolve the w1.ireal,w1.jreal, with the permutation of the Minv, then we pick up what
// will become the 'corner' coefficient, if the move is accepted, after the exchange of row and col.
w1 . ksi = mat_inv ( w1 . jreal , w1 . ireal ) ;
newdet = det * w1 . ksi ;
newsign = ( ( i + j ) % 2 = = 0 ? sign : - sign ) ;
return ( newdet / det ) * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
}
//------------------------------------------------------------------------------------------
private :
void complete_remove ( ) {
if ( N = = 1 ) { clear ( ) ; return ; }
// repack the matrix inv_mat
// swap the rows w1.ireal and N, w1.jreal and N in inv_mat
// Remember that for M row/col is interchanged by inversion, transposition.
{
range R ( 0 , N ) ;
if ( w1 . jreal ! = N - 1 ) {
arrays : : deep_swap ( mat_inv ( w1 . jreal , R ) , mat_inv ( N - 1 , R ) ) ;
y_values [ w1 . jreal ] = y_values [ N - 1 ] ;
}
if ( w1 . ireal ! = N - 1 ) {
arrays : : deep_swap ( mat_inv ( R , w1 . ireal ) , mat_inv ( R , N - 1 ) ) ;
x_values [ w1 . ireal ] = x_values [ N - 1 ] ;
}
}
N - - ;
// M <- a - d^-1 b c with BLAS
w1 . ksi = - 1 / mat_inv ( N , N ) ;
range R ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//mat_inv(R,R) += w1.ksi, * mat_inv(R,N) * mat_inv(N,R);
blas : : ger ( w1 . ksi , mat_inv ( R , N ) , mat_inv ( N , R ) , mat_inv ( R , R ) ) ;
2013-07-17 19:24:07 +02:00
// modify the permutations
for ( size_t k = w1 . i ; k < N ; k + + ) { row_num [ k ] = row_num [ k + 1 ] ; }
for ( size_t k = w1 . j ; k < N ; k + + ) { col_num [ k ] = col_num [ k + 1 ] ; }
for ( size_t k = 0 ; k < N ; k + + )
{
if ( col_num [ k ] = = N ) col_num [ k ] = w1 . jreal ;
if ( row_num [ k ] = = N ) row_num [ k ] = w1 . ireal ;
}
row_num . pop_back ( ) ; col_num . pop_back ( ) ;
x_values . pop_back ( ) ; y_values . pop_back ( ) ;
}
public :
//------------------------------------------------------------------------------------------
/**
* Double Removal operation of cols j0 , j1 and rows i0 , i1
*
* Returns the ratio of det Minv_new / det Minv .
* This routine does NOT make any modification . It has to be completed with complete_operation ( ) .
*/
value_type try_remove2 ( size_t i0 , size_t i1 , size_t j0 , size_t j1 ) {
assert ( N > = 2 ) ; assert ( i0 ! = i1 ) ; assert ( j0 ! = j1 ) ;
assert ( i0 < N ) ; assert ( j0 < N ) ; assert ( i0 > = 0 ) ; assert ( j0 > = 0 ) ;
assert ( i1 < N + 1 ) ; assert ( j1 < N + 1 ) ; assert ( i1 > = 0 ) ; assert ( j1 > = 0 ) ;
last_try = 11 ;
w2 . i [ 0 ] = std : : min ( i0 , i1 ) ;
w2 . i [ 1 ] = std : : max ( i0 , i1 ) ;
w2 . j [ 0 ] = std : : min ( j0 , j1 ) ;
w2 . j [ 1 ] = std : : max ( j0 , j1 ) ;
w2 . ireal [ 0 ] = row_num [ w2 . i [ 0 ] ] ;
w2 . ireal [ 1 ] = row_num [ w2 . i [ 1 ] ] ;
w2 . jreal [ 0 ] = col_num [ w2 . j [ 0 ] ] ;
w2 . jreal [ 1 ] = col_num [ w2 . j [ 1 ] ] ;
// compute the newdet
w2 . ksi ( 0 , 0 ) = mat_inv ( w2 . jreal [ 0 ] , w2 . ireal [ 0 ] ) ;
w2 . ksi ( 1 , 0 ) = mat_inv ( w2 . jreal [ 1 ] , w2 . ireal [ 0 ] ) ;
w2 . ksi ( 0 , 1 ) = mat_inv ( w2 . jreal [ 0 ] , w2 . ireal [ 1 ] ) ;
w2 . ksi ( 1 , 1 ) = mat_inv ( w2 . jreal [ 1 ] , w2 . ireal [ 1 ] ) ;
newdet = det * w2 . det_ksi ( ) ;
newsign = ( ( i0 + j0 + i1 + j1 ) % 2 = = 0 ? sign : - sign ) ;
return ( newdet / det ) * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
}
//------------------------------------------------------------------------------------------
private :
void complete_remove2 ( ) {
if ( N = = 2 ) { clear ( ) ; return ; } // put the sign to 1 also .... Change complete_remove...
size_t i_real_max = std : : max ( w2 . ireal [ 0 ] , w2 . ireal [ 1 ] ) ;
size_t i_real_min = std : : min ( w2 . ireal [ 0 ] , w2 . ireal [ 1 ] ) ;
size_t j_real_max = std : : max ( w2 . jreal [ 0 ] , w2 . jreal [ 1 ] ) ;
size_t j_real_min = std : : min ( w2 . jreal [ 0 ] , w2 . jreal [ 1 ] ) ;
range R ( 0 , N ) ;
if ( j_real_max ! = N - 1 ) {
arrays : : deep_swap ( mat_inv ( j_real_max , R ) , mat_inv ( N - 1 , R ) ) ;
y_values [ j_real_max ] = y_values [ N - 1 ] ;
}
if ( j_real_min ! = N - 2 ) {
arrays : : deep_swap ( mat_inv ( j_real_min , R ) , mat_inv ( N - 2 , R ) ) ;
y_values [ j_real_min ] = y_values [ N - 2 ] ;
}
if ( i_real_max ! = N - 1 ) {
arrays : : deep_swap ( mat_inv ( R , i_real_max ) , mat_inv ( R , N - 1 ) ) ;
x_values [ i_real_max ] = x_values [ N - 1 ] ;
}
if ( i_real_min ! = N - 2 ) {
arrays : : deep_swap ( mat_inv ( R , i_real_min ) , mat_inv ( R , N - 2 ) ) ;
x_values [ i_real_min ] = x_values [ N - 2 ] ;
}
N - = 2 ;
// M <- a - d^-1 b c with BLAS
range Rn ( 0 , N ) , Rl ( N , N + 2 ) ;
//w2.ksi = mat_inv(Rl,Rl);
//w2.ksi = inverse( w2.ksi);
w2 . ksi = inverse ( mat_inv ( Rl , Rl ) ) ;
// write explicitely the second product on ksi for speed ?
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//mat_inv(Rn,Rn) -= mat_inv(Rn,Rl) * (w2.ksi * mat_inv(Rl,Rn)); // OPTIMIZE BELOW
blas : : gemm ( - 1.0 , mat_inv ( Rn , Rl ) , w2 . ksi * mat_inv ( Rl , Rn ) , 1.0 , mat_inv ( Rn , Rn ) ) ;
2013-07-17 19:24:07 +02:00
// modify the permutations
for ( size_t k = w2 . i [ 0 ] ; k < w2 . i [ 1 ] - 1 ; k + + ) row_num [ k ] = row_num [ k + 1 ] ;
for ( size_t k = w2 . i [ 1 ] - 1 ; k < N ; k + + ) row_num [ k ] = row_num [ k + 2 ] ;
for ( size_t k = w2 . j [ 0 ] ; k < w2 . j [ 1 ] - 1 ; k + + ) col_num [ k ] = col_num [ k + 1 ] ;
for ( size_t k = w2 . j [ 1 ] - 1 ; k < N ; k + + ) col_num [ k ] = col_num [ k + 2 ] ;
for ( size_t k = 0 ; k < N ; k + + ) {
if ( col_num [ k ] = = N + 1 ) col_num [ k ] = j_real_max ;
if ( col_num [ k ] = = N ) col_num [ k ] = j_real_min ;
if ( row_num [ k ] = = N + 1 ) row_num [ k ] = i_real_max ;
if ( row_num [ k ] = = N ) row_num [ k ] = i_real_min ;
}
for ( int u = 0 ; u < 2 ; + + u ) { row_num . pop_back ( ) ; col_num . pop_back ( ) ; x_values . pop_back ( ) ; y_values . pop_back ( ) ; }
}
//------------------------------------------------------------------------------------------
public :
/**
* Consider the change the column j and the corresponding y .
*
* Returns the ratio of det Minv_new / det Minv .
* This routine does NOT make any modification . It has to be completed with complete_operation ( ) .
*/
value_type try_change_col ( size_t j , xy_type const & y ) {
assert ( j < N ) ; assert ( j > = 0 ) ;
w1 . j = j ; last_try = 3 ;
w1 . jreal = col_num [ j ] ;
w1 . y = y ;
// Compute the col B.
for ( size_t i = 0 ; i < N ; i + + ) w1 . MC ( i ) = f ( x_values [ i ] , w1 . y ) - f ( x_values [ i ] , y_values [ w1 . jreal ] ) ;
range R ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w1.MB(R) = mat_inv(R,R) * w1.MC(R);// OPTIMIZE BELOW
blas : : gemv ( 1.0 , mat_inv ( R , R ) , w1 . MC ( R ) , 0.0 , w1 . MB ( R ) ) ;
2013-07-17 19:24:07 +02:00
// compute the newdet
w1 . ksi = ( 1 + w1 . MB ( w1 . jreal ) ) ;
newdet = det * w1 . ksi ;
newsign = sign ;
return ( newdet / det ) * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
}
//------------------------------------------------------------------------------------------
private :
void complete_change_col ( ) {
range R ( 0 , N ) ;
y_values [ w1 . jreal ] = w1 . y ;
// modifying M : Mij += w1.ksi Bi Mnj
// using Shermann Morrison formula.
// implemented in 2 times : first Bn=0 so that Mnj is not modified ! and then change Mnj
// Cf notes : simply multiply by -w1.ksi
w1 . ksi = - 1 / ( 1 + w1 . MB ( w1 . jreal ) ) ;
w1 . MB ( w1 . jreal ) = 0 ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//mat_inv(R,R) += w1.ksi * w1.MB(R) * mat_inv(w1.jreal,R)); // OPTIMIZE BELOW
blas : : ger ( w1 . ksi , w1 . MB ( R ) , mat_inv ( w1 . jreal , R ) , mat_inv ( R , R ) ) ;
mat_inv ( w1 . jreal , R ) * = - w1 . ksi ;
2013-07-17 19:24:07 +02:00
}
//------------------------------------------------------------------------------------------
public :
/**
* Consider the change the row i and the corresponding x .
*
* Returns the ratio of det Minv_new / det Minv .
* This routine does NOT make any modification . It has to be completed with complete_operation ( ) .
*/
value_type try_change_row ( size_t i , xy_type const & x ) {
assert ( i < N ) ; assert ( i > = 0 ) ;
w1 . i = i ; last_try = 4 ;
w1 . ireal = row_num [ i ] ;
w1 . x = x ;
// Compute the col B.
for ( size_t i = 0 ; i < N ; i + + ) w1 . MB ( i ) = f ( w1 . x , y_values [ i ] ) - f ( x_values [ w1 . ireal ] , y_values [ i ] ) ;
range R ( 0 , N ) ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//w1.MC(R) = mat_inv(R,R).transpose() * w1.MB(R); // OPTIMIZE BELOW
blas : : gemv ( 1.0 , mat_inv ( R , R ) . transpose ( ) , w1 . MB ( R ) , 0.0 , w1 . MC ( R ) ) ;
2013-07-17 19:24:07 +02:00
// compute the newdet
w1 . ksi = ( 1 + w1 . MC ( w1 . ireal ) ) ;
newdet = det * w1 . ksi ;
newsign = sign ;
return ( newdet / det ) * ( newsign * sign ) ; // sign is unity, hence 1/sign == sign
}
//------------------------------------------------------------------------------------------
private :
void complete_change_row ( ) {
range R ( 0 , N ) ;
x_values [ w1 . ireal ] = w1 . x ;
// modifying M : M ij += w1.ksi Min Cj
// using Shermann Morrison formula.
// impl. Cf case 3
w1 . ksi = - 1 / ( 1 + w1 . MC ( w1 . ireal ) ) ;
w1 . MC ( w1 . ireal ) = 0 ;
Fix matrix * alias issue and adapt det_manip
- The previous version of the * operator for matrix was too clever.
It was giving a lazy object and then rewriting C = A *B into gemm (a,A,B,0,C).
The pb was in case of aliasing : when e.g. C = A, or is a part of A.
gemm is not correct that case, and as a result generic code like
a = a *b
may not be correct in matrix case, which is unacceptable.
- So we revert to a simple * operator for matrix
that does immediate computation.
Same thing for matrix* vector
- we also suppress a_x_ty class.
-> for M = a * b,
when M is a matrix, there is no overhead due to move assignment
-> however, when M is a view, there is an additionnal copy.
-Correctness comes first, hence the fix.
However, if one wants more speed and one can guarantee that
there is no aliasing possible, then one has to write a direct gemm call.
-> det_manip class was adapted, since in that case, we can show there
no alias, and we want the speed gain, so the * ops where replaced
by direct blas call (using the array blas interface).
-> also gemm, gemv, ger were overloaded in the case the return
matrix/vector (i.e. last parameter of the function) is not an lvalue,
but a temporary view created on the fly.
2013-09-10 21:41:17 +02:00
//mat_inv(R,R) += w1.ksi * mat_inv(R,w1.ireal) * w1.MC(R);
blas : : ger ( w1 . ksi , mat_inv ( R , w1 . ireal ) , w1 . MC ( R ) , mat_inv ( R , R ) ) ;
mat_inv ( R , w1 . ireal ) * = - w1 . ksi ;
2013-07-17 19:24:07 +02:00
}
//------------------------------------------------------------------------------------------
private :
void check_mat_inv ( double precision_warning = 1.e-8 , double precision_error = 1.e-5 ) {
if ( N = = 0 ) return ;
const bool relative = true ;
matrix_type res ( N , N ) ;
for ( size_t i = 0 ; i < N ; i + + )
for ( size_t j = 0 ; j < N ; j + + )
res ( i , j ) = f ( x_values [ i ] , y_values [ j ] ) ;
res = inverse ( res ) ;
double r = max_element ( abs ( res - mat_inv ( range ( 0 , N ) , range ( 0 , N ) ) ) ) ;
double r2 = max_element ( abs ( res + mat_inv ( range ( 0 , N ) , range ( 0 , N ) ) ) ) ;
//value_type r = max_element (abs(res - mat_inv(range(0,N),range(0,N))));
//value_type r2= max_element (abs(res + mat_inv(range(0,N),range(0,N))));
//#define TRIQS_DET_MANIP_VERBOSE_CHECK
# ifdef TRIQS_DET_MANIP_VERBOSE_CHECK
std : : cout < < " ---------------- " < < std : : endl
< < " check " < < " N = " < < N < < " " < < " r,r2 = " < < r < < " " < < r2 < < " " < < std : : endl ;
# endif
bool err = ! ( r < ( relative ? precision_error * r2 : precision_error ) ) ;
bool war = ! ( r < ( relative ? precision_warning * r2 : precision_warning ) ) ;
if ( err | | war ) {
std : : cerr < < " matrix = " < < matrix ( ) < < std : : endl ;
std : : cerr < < " inverse_matrix = " < < inverse_matrix ( ) < < std : : endl ;
}
if ( war ) std : : cerr < < " Warning : det_manip deviation above warning threshold "
< < " check " < < " N = " < < N < < " " < < " \n max(abs(M^-1 - M^-1_true)) = " < < r
< < " \n precision*max(abs(M^-1 + M^-1_true)) = " < < ( relative ? precision_warning * r2 : precision_warning ) < < " " < < std : : endl ;
if ( err ) TRIQS_RUNTIME_ERROR < < " Error : det_manip deviation above critical threshold !! " ;
// since we have the proper inverse, replace the matrix
mat_inv ( range ( 0 , N ) , range ( 0 , N ) ) = res ;
}
//------------------------------------------------------------------------------------------
public :
/**
* Finish the move of the last try_xxx called .
* Throws if no try_xxx has been done or if the last operation was complete_operation .
*/
void complete_operation ( ) {
switch ( last_try ) {
case ( 1 ) :
complete_insert ( ) ;
break ;
case ( 2 ) :
complete_remove ( ) ;
break ;
case ( 3 ) :
complete_change_col ( ) ;
break ;
case ( 4 ) :
complete_change_row ( ) ;
break ;
case ( 10 ) :
complete_insert2 ( ) ;
break ;
case ( 11 ) :
complete_remove2 ( ) ;
break ;
case ( 0 ) :
last_try = 0 ; return ;
break ; // double call of complete_operation...
default :
TRIQS_RUNTIME_ERROR < < " Misuing det_manip " ;
}
det = newdet ;
sign = newsign ;
last_try = 0 ;
+ + n_opts ;
if ( n_opts > n_opts_max_before_check ) { check_mat_inv ( ) ; n_opts = 0 ; }
}
///
enum RollDirection { None , Up , Down , Left , Right } ;
/**
* " Cyclic Rolling " of the determinant .
*
* Right : Move the Nth col to the first col cyclically .
* Left : Move the first col to the Nth , cyclically .
* Up : Move the first row to the Nth , cyclically .
* Down : Move the Nth row to the first row cyclically .
*
* Returns - 1 is the roll changes the sign of the det , 1 otherwise
* NB : this routine is not a try_xxx : it DOES make the modification and does not need to be completed . . .
* WHY is it like this ? ? ? ? : try_roll : return det + 1 / - 1.
*/
int roll_matrix ( RollDirection roll ) {
size_t tmp ;
const int_type NN = N ;
switch ( roll ) {
case ( None ) :
return 1 ;
case ( Down ) :
tmp = row_num [ N - 1 ] ;
for ( int_type i = NN - 2 ; i > = 0 ; i - - ) row_num [ i + 1 ] = row_num [ i ] ;
row_num [ 0 ] = tmp ;
break ;
case ( Up ) :
tmp = row_num [ 0 ] ;
for ( int_type i = 0 ; i < N - 1 ; i + + ) row_num [ i ] = row_num [ i + 1 ] ;
row_num [ N - 1 ] = tmp ;
break ;
case ( Right ) :
tmp = col_num [ N - 1 ] ;
for ( int_type i = NN - 2 ; i > = 0 ; i - - ) col_num [ i + 1 ] = col_num [ i ] ;
col_num [ 0 ] = tmp ;
break ;
case ( Left ) :
tmp = col_num [ 0 ] ;
for ( int_type i = 0 ; i < N - 1 ; i + + ) col_num [ i ] = col_num [ i + 1 ] ;
col_num [ N - 1 ] = tmp ;
break ;
default :
assert ( 0 ) ;
}
// signature of the cycle of order N : (-1)^(N-1)
if ( ( N - 1 ) % 2 = = 1 ) { sign * = - 1 ; return - 1 ; }
return 1 ;
}
} ;
} }
# endif