mirror of
https://github.com/TREX-CoE/Sherman-Morrison.git
synced 2025-01-12 05:58:28 +01:00
Merged random cycle generator into the main scherman-morrison repository
This commit is contained in:
parent
f35ad6a777
commit
59321bc06f
66
random_generator/.clang-format
Normal file
66
random_generator/.clang-format
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# Generated from CLion C/C++ Code Style settings
|
||||||
|
BasedOnStyle: LLVM
|
||||||
|
AccessModifierOffset: -2
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignConsecutiveAssignments: None
|
||||||
|
AlignOperands: Align
|
||||||
|
AllowAllArgumentsOnNextLine: false
|
||||||
|
AllowAllConstructorInitializersOnNextLine: false
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: false
|
||||||
|
AllowShortBlocksOnASingleLine: Always
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: All
|
||||||
|
AllowShortIfStatementsOnASingleLine: Always
|
||||||
|
AllowShortLambdasOnASingleLine: All
|
||||||
|
AllowShortLoopsOnASingleLine: true
|
||||||
|
AlwaysBreakAfterReturnType: None
|
||||||
|
AlwaysBreakTemplateDeclarations: Yes
|
||||||
|
BreakBeforeBraces: Custom
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: false
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: Never
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterUnion: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: false
|
||||||
|
SplitEmptyRecord: true
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeTernaryOperators: true
|
||||||
|
BreakConstructorInitializers: BeforeColon
|
||||||
|
BreakInheritanceList: BeforeColon
|
||||||
|
ColumnLimit: 100
|
||||||
|
CompactNamespaces: false
|
||||||
|
ContinuationIndentWidth: 8
|
||||||
|
IndentCaseLabels: true
|
||||||
|
IndentPPDirectives: None
|
||||||
|
IndentWidth: 2
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||||
|
MaxEmptyLinesToKeep: 2
|
||||||
|
NamespaceIndentation: All
|
||||||
|
ObjCSpaceAfterProperty: false
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PointerAlignment: Right
|
||||||
|
ReflowComments: false
|
||||||
|
SpaceAfterCStyleCast: true
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: false
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 0
|
||||||
|
SpacesInAngles: false
|
||||||
|
SpacesInCStyleCastParentheses: false
|
||||||
|
SpacesInContainerLiterals: false
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
TabWidth: 2
|
||||||
|
UseTab: Never
|
14
random_generator/.gitignore
vendored
Normal file
14
random_generator/.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
|
||||||
|
# Common build directories
|
||||||
|
[bB]uild
|
||||||
|
[dD]ebug
|
||||||
|
[rR]elease
|
||||||
|
# This one is often used by intellij ide
|
||||||
|
cmake-*
|
||||||
|
|
||||||
|
# IDE related
|
||||||
|
.idea
|
||||||
|
.vscode
|
||||||
|
|
||||||
|
|
21
random_generator/CMakeLists.txt
Normal file
21
random_generator/CMakeLists.txt
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.16)
|
||||||
|
|
||||||
|
project("TREX - Scherman-Morrison random generator" VERSION "0.1.0")
|
||||||
|
|
||||||
|
set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/headers)
|
||||||
|
set(GLOBAL_BIN_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
|
||||||
|
set(GLOBAL_LIB_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 20)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
# Handle versioning
|
||||||
|
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/versioning.h.in" "${PROJECT_BINARY_DIR}/versioning.h")
|
||||||
|
include_directories(${PROJECT_BINARY_DIR})
|
||||||
|
|
||||||
|
find_package(HDF5 REQUIRED)
|
||||||
|
find_package(BLAS REQUIRED)
|
||||||
|
find_package(LAPACK REQUIRED)
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
|
||||||
|
add_subdirectory(src)
|
131
random_generator/README.md
Normal file
131
random_generator/README.md
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
![Builds](https://github.com/Thukisdo/mlkaps-random-generator/actions/workflows/cmake.yml/badge.svg)
|
||||||
|
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
|
||||||
|
![Maintainer](https://img.shields.io/badge/maintainer-Thukisdo-blue)
|
||||||
|
# Random cycle generator for QMC computations
|
||||||
|
|
||||||
|
TREX's QMCKL library provides an implementation of the Scherman-morrison formula.
|
||||||
|
This tool provides a way to generate random HDF5 datasets that can be used for
|
||||||
|
benchmarking those implementations.
|
||||||
|
|
||||||
|
Specifically, it reproduces "cycles" that are generated by QMCKL computations.
|
||||||
|
A cycle is a sequence of row updates on a matrix, that require multiple calls to
|
||||||
|
Scherman-morrison to update the corresponding inverse matrix.
|
||||||
|
|
||||||
|
Note that this tool main purpose is to generate splitting cycles ([More on this here](#More-on-splitting-cycles))
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
This project uses cmake, so first set up a build directory and build the executable
|
||||||
|
|
||||||
|
```shell
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||||
|
make -j 4
|
||||||
|
```
|
||||||
|
|
||||||
|
The executable can then be launched with
|
||||||
|
|
||||||
|
```shell
|
||||||
|
cd build
|
||||||
|
./bin/random_generator <output_file> <generation_mode>
|
||||||
|
```
|
||||||
|
|
||||||
|
Where generation mode is one of the following:
|
||||||
|
|
||||||
|
- matrix_size: Generate a dataset of increasing matrix sizes, stored as
|
||||||
|
|
||||||
|
```
|
||||||
|
/
|
||||||
|
|-<matrix_size>/
|
||||||
|
| |-<number_of_splitting_updates>/
|
||||||
|
| | |-cycle_<xxx>/
|
||||||
|
| | |- ...
|
||||||
|
| | |-cycle_<xxx>
|
||||||
|
| | |- ...
|
||||||
|
| |-<number_of_splitting_updates>/
|
||||||
|
|-<matrix_size>/
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
- update: Generate a dataset of increasing update count, stored as
|
||||||
|
|
||||||
|
```
|
||||||
|
/
|
||||||
|
|-<number_of_updates>/
|
||||||
|
| |-<number_of_splitting_updates>/
|
||||||
|
| | |-cycle_<xxx>/
|
||||||
|
| | |- ...
|
||||||
|
| | |-cycle_<xxx>
|
||||||
|
| | |- ...
|
||||||
|
| |-<number_of_splitting_updates>/
|
||||||
|
|-<number_of_updates>/
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
If you require another generation mode, please open an issue or modify the main accordingly.
|
||||||
|
|
||||||
|
# Cycle format
|
||||||
|
|
||||||
|
Cycles are stored in the following format:
|
||||||
|
|
||||||
|
```
|
||||||
|
cycle_<xxx>/
|
||||||
|
|-slater_inverse_t: The transposed-inverse of the slater matrix to update
|
||||||
|
|-updates: A matrix containing all the (additive) updates to apply to the slater matrix
|
||||||
|
|-nupdates: The number of updates in the cycle
|
||||||
|
|-determinant: The determinant of the slater matrix before the updates
|
||||||
|
|-slater_matrix: The slater matrix to update
|
||||||
|
|-col_update_index: The index of the row to update in the slater matrix
|
||||||
|
|-condition_number: The condition number of the slater matrix before the updates
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the updates are not applied on columns of the transposed-inverse slater matrix, but on its rows.
|
||||||
|
As such, it is needed to transpose the slater matrix before applying the updates.
|
||||||
|
|
||||||
|
# More on matrix-update cycles
|
||||||
|
|
||||||
|
During QMC computation, a series of updates are applied on a matrix. One update affect an entire row/column
|
||||||
|
of the matrix. This series of successive updates is called a "cycle".
|
||||||
|
|
||||||
|
Instead of computing the inverse of this matrix from scratch (which is expansive),
|
||||||
|
it is possible to update the inverse by applying the Scherman-Morrison algorithm.
|
||||||
|
|
||||||
|
This tool generates a dataset containing a number of matrix-update cycles.
|
||||||
|
|
||||||
|
# More on splitting cycles
|
||||||
|
|
||||||
|
During the cycle, an update may render the matrix non-invertible. We will call such an update a "splitting update".
|
||||||
|
Some algorithm will then fail to apply the cycle, whereas smarter algorithms will succeed by delaying the update
|
||||||
|
(applying the remaining updates first), or by applying the half of the update and delaying the other half.
|
||||||
|
|
||||||
|
Those splitting updates lead to increased computation time, and are critical for performance evaluation.
|
||||||
|
|
||||||
|
As such, this tool is centered around generating splitting updates.
|
||||||
|
|
||||||
|
# How it works
|
||||||
|
|
||||||
|
The hardest case for Scherman-Morrison is when the matrix is almost-singular (splitting update).
|
||||||
|
To reproduce this case, we generate an update that render a given column colinear to another.
|
||||||
|
|
||||||
|
A naive implementation would produce a failure after checking that the determinant is close to zero.
|
||||||
|
To fix this, we add a small noise to this update, and follow it with another update that breaks the colinearity.
|
||||||
|
|
||||||
|
### Chained-updates
|
||||||
|
|
||||||
|
We use a basic implementation of this algorithm called the "Chained-updates".
|
||||||
|
|
||||||
|
It produces updates by traversing the matrix left-to-right, where splitting updates are made colinear
|
||||||
|
to the column to their right.
|
||||||
|
A normal update will then break the chain of any previous splitting updates.
|
||||||
|
|
||||||
|
We ensure that the final update of the cycle is non-splitting, to guarantee that the final matrix
|
||||||
|
is invertible.
|
||||||
|
|
||||||
|
### Pitfalls
|
||||||
|
|
||||||
|
This methods has two main downfalls:
|
||||||
|
|
||||||
|
- A cycle cannot have a single splitting update, since a normal update is required to break the chain.
|
||||||
|
- This is the best case scenario for implementation of Scherman-Morrison that postpone splitting updates.
|
||||||
|
We could make this harder by generating bi-directional chains.
|
38
random_generator/headers/Engine.hpp
Normal file
38
random_generator/headers/Engine.hpp
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <filesystem>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "EngineModeExecutor.hpp"
|
||||||
|
|
||||||
|
enum class EngineMode {
|
||||||
|
kMatrixSize,
|
||||||
|
kUpdateCount,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Main class responsible for starting the correct generation depending on the program arguments
|
||||||
|
*
|
||||||
|
* Adding a new generation mode is as simple as adding a new enum value to EngineMode and adapting the
|
||||||
|
* exec() method
|
||||||
|
*/
|
||||||
|
class Engine {
|
||||||
|
public:
|
||||||
|
Engine(int argc, char **argv);
|
||||||
|
|
||||||
|
int exec();
|
||||||
|
|
||||||
|
char *getExecutablePath() { return argv[0]; }
|
||||||
|
|
||||||
|
const std::filesystem::path &getOutputPath() const { return dataset_output_path; }
|
||||||
|
|
||||||
|
const char *const *getArgv() const { return argv; }
|
||||||
|
|
||||||
|
int getArgc() const { return argc; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
char **argv;
|
||||||
|
int argc;
|
||||||
|
EngineMode mode;
|
||||||
|
std::filesystem::path dataset_output_path;
|
||||||
|
};
|
29
random_generator/headers/EngineModeExecutor.hpp
Normal file
29
random_generator/headers/EngineModeExecutor.hpp
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
class Engine;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface for classes that execute a specific generation mode
|
||||||
|
*
|
||||||
|
* Subclassing this class allows one to easily add new custom generation modes to the engine.
|
||||||
|
* (This class is purposefully simple to allow for easy subclassing, try to keep it this way)
|
||||||
|
*/
|
||||||
|
class EngineModeExecutor {
|
||||||
|
public:
|
||||||
|
explicit EngineModeExecutor(Engine *parent_engine) : parent(parent_engine) {}
|
||||||
|
|
||||||
|
virtual ~EngineModeExecutor() = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Main method of the executor. This method is automatically called by the Engine class.
|
||||||
|
* @return The return code of the execution, 0 on success, non-zero on error.
|
||||||
|
*/
|
||||||
|
virtual int exec() = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* The engine that owns this executor
|
||||||
|
* Can be used to fetch the program arguments, etc.
|
||||||
|
*/
|
||||||
|
Engine *parent;
|
||||||
|
};
|
17
random_generator/headers/MatrixSizeExecutor.hpp
Normal file
17
random_generator/headers/MatrixSizeExecutor.hpp
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "EngineModeExecutor.hpp"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generates a dataset with matrices of increasing size and increasing number of splits.
|
||||||
|
* The total number of updates is fixed, and the number of splits is increasing.
|
||||||
|
*
|
||||||
|
* @TODO Add a method for customizing the executor's parameters
|
||||||
|
*/
|
||||||
|
class MatrixSizeExecutor : public EngineModeExecutor {
|
||||||
|
public:
|
||||||
|
using EngineModeExecutor::EngineModeExecutor;
|
||||||
|
|
||||||
|
int exec() override;
|
||||||
|
};
|
16
random_generator/headers/UpdateCountExecutor.hpp
Normal file
16
random_generator/headers/UpdateCountExecutor.hpp
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "EngineModeExecutor.hpp"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generates a dataset where cycles contain a varying number of updates.
|
||||||
|
* The number of splitting updates varies between 2 and the number of updates.
|
||||||
|
*
|
||||||
|
* @TODO Add a method for customizing the executor's parameters
|
||||||
|
*/
|
||||||
|
class UpdateCountExecutor : public EngineModeExecutor {
|
||||||
|
public:
|
||||||
|
using EngineModeExecutor::EngineModeExecutor;
|
||||||
|
|
||||||
|
int exec() override;
|
||||||
|
};
|
47
random_generator/headers/cycle_generation/CApi.h
Normal file
47
random_generator/headers/cycle_generation/CApi.h
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
// This file provide a simple API to generate random cycles
|
||||||
|
// The goal of this api is to provide a way for other programs to generate random cycles at runtime
|
||||||
|
// instead of requiring a pre-generated dataset
|
||||||
|
|
||||||
|
// Don't use "pragma once" to ensure compatibility with any compiler
|
||||||
|
#ifndef C_API_HEADER_GUARD
|
||||||
|
#define C_API_HEADER_GUARD
|
||||||
|
|
||||||
|
// This file is included from both C++ and C, so ensure everything here has C linkage
|
||||||
|
// if need be
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#include <cstdint>
|
||||||
|
extern "C" {
|
||||||
|
#else
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
// Total number of updates in the cycle
|
||||||
|
uint32_t n_update;
|
||||||
|
// Dim is the number of rows
|
||||||
|
uint32_t dim;
|
||||||
|
// lds is the number of columns
|
||||||
|
uint32_t lds;
|
||||||
|
double *slater_inverse_t;
|
||||||
|
double *slater_matrix;
|
||||||
|
// Matrix containing all the updates !The updates are ADDITIVE!
|
||||||
|
double *updates;
|
||||||
|
// Determinant of the slater matrix
|
||||||
|
double determinant;
|
||||||
|
// Indices of the updates
|
||||||
|
uint32_t *col_update_index;
|
||||||
|
} Cycle;
|
||||||
|
|
||||||
|
Cycle *generateRandomCycle(uint32_t dim, uint32_t lds, uint32_t n_update, uint32_t n_splits,
|
||||||
|
double determinant_threshold, double splitting_update_noise_magnitude);
|
||||||
|
|
||||||
|
void freeCycle(Cycle **cycle);
|
||||||
|
|
||||||
|
|
||||||
|
// To match the previous opening bracket
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
28
random_generator/headers/cycle_generation/Cycle.hpp
Normal file
28
random_generator/headers/cycle_generation/Cycle.hpp
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Matrix.hpp"
|
||||||
|
#include "update_generation/UpdateMatrix.hpp"
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
class Cycle {
|
||||||
|
public:
|
||||||
|
Matrix &getSlaterMatrix() { return matrix; }
|
||||||
|
|
||||||
|
const Matrix &getSlaterMatrix() const { return matrix; }
|
||||||
|
|
||||||
|
Matrix &getSlaterInverse() { return matrix_invt; }
|
||||||
|
|
||||||
|
const Matrix &getSlaterInverseTransposed() const { return matrix_invt; }
|
||||||
|
|
||||||
|
UpdateMatrix &getUpdateMatrix() { return update_array; }
|
||||||
|
|
||||||
|
const UpdateMatrix &getUpdateMatrix() const { return update_array; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
Matrix matrix;
|
||||||
|
Matrix matrix_invt;
|
||||||
|
UpdateMatrix update_array;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace randomgen
|
36
random_generator/headers/cycle_generation/CycleGenerator.hpp
Normal file
36
random_generator/headers/cycle_generation/CycleGenerator.hpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Cycle.hpp"
|
||||||
|
#include "Matrix.hpp"
|
||||||
|
#include "update_generation/UpdateGenerator.hpp"
|
||||||
|
#include "update_generation/UpdateMatrix.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
class CycleGenerator {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Cycle generator
|
||||||
|
* @param x The size of the (squared) matrix
|
||||||
|
* @param y_padding The column padding to add to the matrix
|
||||||
|
* @param generator The generator used to generate the update vectors
|
||||||
|
* Note that this generator must be thread-safe and re-entrant
|
||||||
|
*/
|
||||||
|
CycleGenerator(size_t x, size_t y_padding,
|
||||||
|
std::shared_ptr<const UpdateGenerator> update_generator);
|
||||||
|
|
||||||
|
void generateCycleMatrices(Cycle *res);
|
||||||
|
|
||||||
|
Cycle make(size_t n_update, size_t n_splits);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<const UpdateGenerator> update_generator;
|
||||||
|
MatrixGenerator matrix_generator;
|
||||||
|
size_t x, y_padding;
|
||||||
|
|
||||||
|
Cycle attemptToGenerateCycle(size_t n_update, size_t n_splits);
|
||||||
|
|
||||||
|
void finalizeCycle(Cycle &res) const;
|
||||||
|
};
|
||||||
|
}// namespace randomgen
|
42
random_generator/headers/cycle_generation/Matrix.hpp
Normal file
42
random_generator/headers/cycle_generation/Matrix.hpp
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <random>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
struct Matrix {
|
||||||
|
Matrix() = default;
|
||||||
|
Matrix(size_t x, size_t y);
|
||||||
|
|
||||||
|
Matrix(const Matrix &other);
|
||||||
|
Matrix &operator=(const Matrix &other);
|
||||||
|
|
||||||
|
Matrix(Matrix &&other) = default;
|
||||||
|
Matrix &operator=(Matrix &&other) = default;
|
||||||
|
|
||||||
|
Matrix inverse();
|
||||||
|
Matrix transpose();
|
||||||
|
|
||||||
|
friend std::ostream &operator<<(std::ostream &os, const Matrix &m);
|
||||||
|
|
||||||
|
double *data() { return array.get(); }
|
||||||
|
|
||||||
|
const double *data() const { return array.get(); }
|
||||||
|
|
||||||
|
void pad(size_t x_pad, size_t y_pad);
|
||||||
|
double computeDeterminant();
|
||||||
|
double getLastKnownDeterminant() const;
|
||||||
|
|
||||||
|
uint32_t x = 0, y = 0;
|
||||||
|
double determinant;
|
||||||
|
std::unique_ptr<double[]> array;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MatrixGenerator {
|
||||||
|
public:
|
||||||
|
Matrix operator()(size_t matrix_size, double vmin = -3, double vmax = 3);
|
||||||
|
};
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,24 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "hdf5CycleOutputStream.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A stream that stores cycles based on the matrices size
|
||||||
|
*/
|
||||||
|
class SizedBasedCycleStream : public hdf5CycleOutputStream {
|
||||||
|
public:
|
||||||
|
using hdf5CycleOutputStream::hdf5CycleOutputStream;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Returns the path as /matrix_size/splits_count/cycle_id
|
||||||
|
*
|
||||||
|
* @param cycle
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
std::string getPathFor(const Cycle &cycle) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,59 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
|
||||||
|
#include "Cycle.hpp"
|
||||||
|
#include <filesystem>
|
||||||
|
#include <hdf5.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
/**
|
||||||
|
* @brief This stream allows one to output multiple cycles to a single HDF5 file.
|
||||||
|
* The cycle storing algorithm is fixed, but this class can be subclassed to change where the cycles
|
||||||
|
* are stored in the final datasets.
|
||||||
|
*
|
||||||
|
* By default, this class stores the cycles per number of updates and number of splits.
|
||||||
|
*/
|
||||||
|
class hdf5CycleOutputStream {
|
||||||
|
public:
|
||||||
|
explicit hdf5CycleOutputStream(const std::filesystem::path &dataset_path);
|
||||||
|
|
||||||
|
virtual ~hdf5CycleOutputStream();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Output a cycle to the dataset.
|
||||||
|
* @param ds The stream to output to.
|
||||||
|
* @param cycle The cycle to output
|
||||||
|
* @return The stream passed as parameter.
|
||||||
|
*/
|
||||||
|
friend hdf5CycleOutputStream &operator<<(hdf5CycleOutputStream &ds, const Cycle &cycle);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* @brief Creates the given path inside the dataset (parents folders included)
|
||||||
|
* @param path The path to create
|
||||||
|
* @return the id of the newly created group
|
||||||
|
* Freeing is the responsibility of the caller.
|
||||||
|
*/
|
||||||
|
hid_t createGroup(const std::string &path);
|
||||||
|
|
||||||
|
hid_t file_id = -1;
|
||||||
|
|
||||||
|
// Each cycle is represented by a unique id
|
||||||
|
// This member is used to keep track of the current id
|
||||||
|
size_t last_unique_cycle_id = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* Get the storage path for a given cycle.
|
||||||
|
*
|
||||||
|
* By default, this is /n_updates/n_splits
|
||||||
|
* @param cycle The cycle that will be stored
|
||||||
|
* @return The path at which the cycle must be stored.
|
||||||
|
*/
|
||||||
|
virtual std::string getPathFor(const Cycle &cycle);
|
||||||
|
};
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,26 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "UpdatePlanner.hpp"
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generates an update plan using chained updates
|
||||||
|
*
|
||||||
|
* Updates are generated left-to-right, where each splitting update targets the column on its right.
|
||||||
|
* By ensuring that the last update is non-splitting, we can guarantee that the matrix remains invertible.
|
||||||
|
*/
|
||||||
|
class ChainedUpdatePlanBuilder : public UpdatePlanner {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Produce a chain-update plan
|
||||||
|
* @param update_count The total number of updates in the plan
|
||||||
|
* @param split_count The number of splitting updates in the plan
|
||||||
|
* @return A new plan with the given parameters
|
||||||
|
*/
|
||||||
|
UpdatePlan make(uint32_t update_count, uint32_t split_count) const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,45 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Describes single update, including its target column, whether it splits or not...
|
||||||
|
*/
|
||||||
|
class UpdateDescriptor {
|
||||||
|
public:
|
||||||
|
UpdateDescriptor() = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Construct a new UpdateDescriptor object
|
||||||
|
* @param splits If true, the update must splits (produce a singular matrix)
|
||||||
|
* @param column_index The column to update
|
||||||
|
* @param target_column_index The target column to use for the split (if the splits is False, this is ignored)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
UpdateDescriptor(bool splits, uint32_t column_index, uint32_t target_column_index)
|
||||||
|
: splits(splits), column_index(column_index), target_column_index(target_column_index) {}
|
||||||
|
|
||||||
|
bool isSplitting() const { return splits; }
|
||||||
|
|
||||||
|
void setSplitting(bool new_split_value) { this->splits = new_split_value; }
|
||||||
|
|
||||||
|
uint32_t getColumnIndex() const { return column_index; }
|
||||||
|
|
||||||
|
void setColumnIndex(uint32_t new_column_index) { column_index = new_column_index; }
|
||||||
|
|
||||||
|
uint32_t getTargetColumnIndex() const { return target_column_index; }
|
||||||
|
|
||||||
|
void setTargetColumnIndex(uint32_t new_target_column_index) {
|
||||||
|
this->target_column_index = new_target_column_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool splits = false;
|
||||||
|
uint32_t column_index = 0;
|
||||||
|
uint32_t target_column_index = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,74 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "UpdateMatrixGenerator.hpp"
|
||||||
|
#include "UpdatePlanner.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Main class for the generator module, capable of planning and generating updates
|
||||||
|
*
|
||||||
|
* This class is a utility class that combines a planner and a generator, into a single entry point for the module.
|
||||||
|
* It provides simplicity for the end user, while advanced user may want to use the planner and generator separately.
|
||||||
|
*/
|
||||||
|
class UpdateGenerator {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Builds a new generator from a planner and an update generator
|
||||||
|
*
|
||||||
|
* Takes unique_ptr to ensure thread-safety and re-entry
|
||||||
|
* @param update_planner The planner to use for generating plans
|
||||||
|
* @param update_generator The generator used to build an UpdateMatrix from a plan
|
||||||
|
*/
|
||||||
|
UpdateGenerator(std::shared_ptr<const UpdatePlanner> update_planner,
|
||||||
|
std::shared_ptr<const UpdateMatrixGenerator> update_generator);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate a new update plan to be fed to the generator at a later time
|
||||||
|
* Allows one to generate a plan that can be reused multiple times.
|
||||||
|
*
|
||||||
|
* This function is thread-safe and re-entrant.
|
||||||
|
* @param update_count The number of updates to plan for
|
||||||
|
* @param split_count The number of splitting updates inside the plan
|
||||||
|
* @return A new plan with the given parameters
|
||||||
|
*/
|
||||||
|
UpdatePlan plan(uint32_t update_count, uint32_t split_count) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Run the generator on the given update plan
|
||||||
|
*
|
||||||
|
* This function is thread-safe and re-entrant.
|
||||||
|
* @param slater_matrix The slater matrix to use for the update generation
|
||||||
|
* @param y_padding The padding to add to the update matrix.
|
||||||
|
* Initially, the updates have the same length as a column of the slater matrix
|
||||||
|
*
|
||||||
|
* @param update_plan
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
UpdateMatrix make(const Matrix &slater_matrix, uint32_t y_padding,
|
||||||
|
const UpdatePlan &update_plan) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Plans and generate an update matrix from scratch.
|
||||||
|
*
|
||||||
|
* Note that a new plan is created each time this function is called. For maximum performance,
|
||||||
|
* consider using the plan() function to generate a plan in situations where the same plan can be reused
|
||||||
|
* multiple times.
|
||||||
|
*
|
||||||
|
* This function is thread-safe and re-entrant.
|
||||||
|
* @param slater_matrix_t
|
||||||
|
* @param y_padding
|
||||||
|
* @param update_count
|
||||||
|
* @param split_count
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
UpdateMatrix make(const Matrix &slater_matrix_t, uint32_t y_padding, uint32_t update_count,
|
||||||
|
uint32_t split_count) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<const UpdatePlanner> planner;
|
||||||
|
std::shared_ptr<const UpdateMatrixGenerator> generator;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,148 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <span>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Stores a set of updates, and provides methods to retrieve them. Include metadata about the updates,
|
||||||
|
* such as the number of splitting updates, etc..
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* The updates are stored as a matrix of size n_update x update_length.
|
||||||
|
*/
|
||||||
|
class UpdateMatrix {
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* @brief Base class for the (Const) iterators. Provides facilities to traverse the update matrix.
|
||||||
|
* @tparam UMatrix the type of the update matrix, for const correctness
|
||||||
|
*/
|
||||||
|
template<typename UMatrix>
|
||||||
|
class BaseIterator {
|
||||||
|
public:
|
||||||
|
BaseIterator(UMatrix &parent_matrix, size_t update_index)
|
||||||
|
: update_index(update_index), parent_matrix(&parent_matrix) {}
|
||||||
|
|
||||||
|
BaseIterator operator+(int count) const {
|
||||||
|
BaseIterator res(*this);
|
||||||
|
res += count;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
BaseIterator &operator+=(int count) {
|
||||||
|
update_index += count;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
BaseIterator &operator++() { return *this += 1; }
|
||||||
|
|
||||||
|
// We can implement the subtraction operators using the addition operators
|
||||||
|
BaseIterator operator-(int count) const { return *this + (-count); }
|
||||||
|
|
||||||
|
|
||||||
|
BaseIterator &operator-=(int count) { return *this += -count; }
|
||||||
|
|
||||||
|
BaseIterator operator--() { return *this -= 1; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
uint32_t update_index;
|
||||||
|
UMatrix *parent_matrix;
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Const iterator on the updates
|
||||||
|
*/
|
||||||
|
class ConstIterator : public BaseIterator<const UpdateMatrix> {
|
||||||
|
public:
|
||||||
|
using BaseIterator::BaseIterator;
|
||||||
|
|
||||||
|
std::span<const double> operator*() const {
|
||||||
|
if (update_index >= parent_matrix->n_update or update_index < 0) {
|
||||||
|
throw std::out_of_range(
|
||||||
|
"UpdateMatrix::ConstIterator::operator*(): update_index out of range");
|
||||||
|
}
|
||||||
|
return parent_matrix->getUpdateSpan(update_index);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Const iterator on the updates
|
||||||
|
*/
|
||||||
|
class Iterator : public BaseIterator<UpdateMatrix> {
|
||||||
|
public:
|
||||||
|
using BaseIterator::BaseIterator;
|
||||||
|
|
||||||
|
std::span<double> operator*() const {
|
||||||
|
if (update_index >= parent_matrix->n_update or update_index < 0) {
|
||||||
|
throw std::out_of_range(
|
||||||
|
"UpdateMatrix::ConstIterator::operator*(): update_index out of range");
|
||||||
|
}
|
||||||
|
return parent_matrix->getUpdateSpan(update_index);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Iterator begin() { return {*this, 0}; }
|
||||||
|
|
||||||
|
ConstIterator begin() const { return {*this, 0}; }
|
||||||
|
|
||||||
|
Iterator end() { return {*this, n_update}; }
|
||||||
|
|
||||||
|
ConstIterator end() const { return {*this, n_update}; }
|
||||||
|
|
||||||
|
UpdateMatrix() = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Construct a new matrix able to store updates of the given size
|
||||||
|
* @param n_update The total number of updates in the matrix
|
||||||
|
* @param n_splits The number of splitting updates
|
||||||
|
* @param update_length The length of each update
|
||||||
|
*/
|
||||||
|
UpdateMatrix(uint32_t n_update, uint32_t n_splits, uint32_t update_length);
|
||||||
|
|
||||||
|
UpdateMatrix(const UpdateMatrix &other);
|
||||||
|
UpdateMatrix &operator=(const UpdateMatrix &other);
|
||||||
|
|
||||||
|
UpdateMatrix(UpdateMatrix &&other) = default;
|
||||||
|
UpdateMatrix &operator=(UpdateMatrix &&other) = default;
|
||||||
|
|
||||||
|
|
||||||
|
double *getRawUpdates() { return matrix.get(); }
|
||||||
|
const double *getRawUpdates() const { return matrix.get(); }
|
||||||
|
|
||||||
|
void setUpdate(uint32_t update_rank, const std::span<const double> &update,
|
||||||
|
uint32_t column_index);
|
||||||
|
|
||||||
|
std::span<double> getUpdateSpan(uint32_t index) {
|
||||||
|
std::span<double> res(getRawUpdates() + (index * update_length), update_length);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::span<const double> getUpdateSpan(uint32_t index) const {
|
||||||
|
std::span<const double> res(getRawUpdates() + (index * update_length), update_length);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t getUpdateCount() const { return n_update; }
|
||||||
|
|
||||||
|
uint32_t getUpdateLength() const { return update_length; }
|
||||||
|
|
||||||
|
uint32_t getSplitCount() const { return n_splits; }
|
||||||
|
|
||||||
|
uint32_t *getUpdateIndices() const { return update_index.get(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// The number of update in this cycle, and the number of updates that are splits
|
||||||
|
// Note that n_update > 0, and n_splits < n_update
|
||||||
|
uint32_t n_update = 0, n_splits = 0;
|
||||||
|
uint32_t update_length = 0;
|
||||||
|
|
||||||
|
// We use unique ptr to limit the size of this class
|
||||||
|
// Thanks to this, this class fits into 32 bytes
|
||||||
|
std::unique_ptr<double[]> matrix;
|
||||||
|
std::unique_ptr<uint32_t[]> update_index;
|
||||||
|
};
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,69 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "UpdateMatrix.hpp"
|
||||||
|
#include "UpdatePlan.hpp"
|
||||||
|
#include "cycle_generation/Matrix.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include <span>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Cycle updates generator that handles splitting and normal cycles
|
||||||
|
*/
|
||||||
|
class UpdateMatrixGenerator {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Exception thrown when the update generator fails to generate an update after a set number of attempts
|
||||||
|
*/
|
||||||
|
class FailedUpdateGeneration : std::runtime_error {
|
||||||
|
public:
|
||||||
|
explicit FailedUpdateGeneration(size_t attempts);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the number of attempts that were made to generate an update before failing
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
size_t getAttemptedGenerationCount() const { return attempted_generation; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t attempted_generation;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Construct a new generator with the given thresholds
|
||||||
|
* @param noise_magnitude The magnitude of the noise added to splitting updates.
|
||||||
|
* The noise is added so that the final matrix is not singular.
|
||||||
|
* If the noise is too small, Scherman-Morrison will fail, if it is too large, the update may not trigger any splits.
|
||||||
|
* Tested with a value of 1e-6
|
||||||
|
* @param determinant_threshold
|
||||||
|
*/
|
||||||
|
explicit UpdateMatrixGenerator(double noise_magnitude, double determinant_threshold = 1e-6)
|
||||||
|
: noise_magnitude(noise_magnitude), determinant_threshold(determinant_threshold) {}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Main function of the generator, takes a matrix as an input, and generate a series of update vectors
|
||||||
|
* Note that this method is thread-safe and re-entrant.
|
||||||
|
* @param slater_matrix The reference matrix
|
||||||
|
* @param update_plan The number of updates to generate
|
||||||
|
* @return An array of update vectors
|
||||||
|
*/
|
||||||
|
UpdateMatrix make(const Matrix &slater_matrix, size_t y_padding,
|
||||||
|
const UpdatePlan &update_plan) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const double noise_magnitude;
|
||||||
|
const double determinant_threshold;
|
||||||
|
|
||||||
|
Matrix iterateOnPlan(const Matrix &m, UpdateMatrix *update_matrix,
|
||||||
|
const UpdatePlan &plan) const;
|
||||||
|
|
||||||
|
void attemptToGenerateUpdates(const UpdatePlan &plan, UpdateMatrix *res, const Matrix &m) const;
|
||||||
|
|
||||||
|
bool tryGenerateUpdates(UpdateMatrix *update_matrix, const UpdatePlan &plan,
|
||||||
|
const Matrix &m) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,48 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "UpdateDescriptor.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Collection of UpdateDescriptors, describing all the updates in a cycle
|
||||||
|
*
|
||||||
|
* This class describes the updates contained in a cycle, as a set of UpdateDescriptors.
|
||||||
|
* It can be fed to the UpdatePlanBuilder to generate a cycle.
|
||||||
|
*/
|
||||||
|
class UpdatePlan {
|
||||||
|
public:
|
||||||
|
using Iterator = std::vector<UpdateDescriptor>::iterator;
|
||||||
|
using ConstIterator = std::vector<UpdateDescriptor>::const_iterator;
|
||||||
|
|
||||||
|
UpdatePlan() = default;
|
||||||
|
|
||||||
|
UpdatePlan(uint32_t updates_count);
|
||||||
|
|
||||||
|
Iterator begin() { return updates_descriptors.begin(); }
|
||||||
|
|
||||||
|
Iterator end() { return updates_descriptors.end(); }
|
||||||
|
|
||||||
|
ConstIterator begin() const { return updates_descriptors.begin(); }
|
||||||
|
|
||||||
|
ConstIterator end() const { return updates_descriptors.end(); }
|
||||||
|
|
||||||
|
UpdateDescriptor &operator[](uint32_t index) { return updates_descriptors[index]; }
|
||||||
|
|
||||||
|
const UpdateDescriptor &operator[](uint32_t index) const { return updates_descriptors[index]; }
|
||||||
|
|
||||||
|
UpdateDescriptor &at(uint32_t index) { return updates_descriptors.at(index); }
|
||||||
|
|
||||||
|
const UpdateDescriptor &at(uint32_t index) const { return updates_descriptors.at(index); }
|
||||||
|
|
||||||
|
uint32_t size() const { return updates_descriptors.size(); }
|
||||||
|
|
||||||
|
uint32_t getSplitCount() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<UpdateDescriptor> updates_descriptors;
|
||||||
|
};
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,14 @@
|
|||||||
|
#pragma once
|
||||||
|
#include "UpdatePlan.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
class UpdatePlanner {
|
||||||
|
public:
|
||||||
|
virtual UpdatePlan make(uint32_t update_count, uint32_t split_count) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}// namespace randomgen
|
27
random_generator/src/CMakeLists.txt
Normal file
27
random_generator/src/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
add_subdirectory(cycle_generation)
|
||||||
|
|
||||||
|
set(CURRENT_INCLUDE_DIR ${INCLUDE_DIR})
|
||||||
|
add_executable(main
|
||||||
|
main.cpp
|
||||||
|
Engine.cpp ${CURRENT_INCLUDE_DIR}/Engine.hpp
|
||||||
|
${CURRENT_INCLUDE_DIR}/EngineModeExecutor.hpp
|
||||||
|
MatrixSizeExecutor.cpp ${CURRENT_INCLUDE_DIR}/MatrixSizeExecutor.hpp
|
||||||
|
UpdateCountExecutor.cpp ${CURRENT_INCLUDE_DIR}/UpdateCountExecutor.hpp versioning.h.in ../headers/cycle_generation/CApi.h)
|
||||||
|
|
||||||
|
target_include_directories(main PUBLIC ${CURRENT_INCLUDE_DIR})
|
||||||
|
|
||||||
|
set_target_properties(main PROPERTIES
|
||||||
|
RUNTIME_OUTPUT_DIRECTORY ${GLOBAL_BIN_DIRECTORY})
|
||||||
|
|
||||||
|
target_link_libraries(main PUBLIC cycle_generation OpenMP::OpenMP_CXX)
|
||||||
|
|
||||||
|
|
||||||
|
add_executable(c_api_test c_api_test.c)
|
||||||
|
|
||||||
|
target_include_directories(c_api_test PUBLIC ${CURRENT_INCLUDE_DIR})
|
||||||
|
|
||||||
|
set_target_properties(c_api_test PROPERTIES
|
||||||
|
RUNTIME_OUTPUT_DIRECTORY ${GLOBAL_BIN_DIRECTORY})
|
||||||
|
|
||||||
|
target_link_libraries(c_api_test PUBLIC cycle_generation)
|
70
random_generator/src/Engine.cpp
Normal file
70
random_generator/src/Engine.cpp
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
|
||||||
|
#include "Engine.hpp"
|
||||||
|
#include "MatrixSizeExecutor.hpp"
|
||||||
|
#include "UpdateCountExecutor.hpp"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
std::unique_ptr<EngineModeExecutor> makeExecutor(EngineMode mode, Engine *engine) {
|
||||||
|
// Create the correct executor for the given mode
|
||||||
|
// Note that the executor can use the engine to get access to the command line arguments
|
||||||
|
switch (mode) {
|
||||||
|
case EngineMode::kMatrixSize:
|
||||||
|
return std::make_unique<MatrixSizeExecutor>(engine);
|
||||||
|
case EngineMode::kUpdateCount:
|
||||||
|
return std::make_unique<UpdateCountExecutor>(engine);
|
||||||
|
default:
|
||||||
|
throw std::runtime_error("Unknown engine mode");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkOutputPath(std::filesystem::path output_path) {
|
||||||
|
|
||||||
|
if (not std::filesystem::exists(output_path)) {
|
||||||
|
output_path = std::filesystem::absolute(output_path);
|
||||||
|
// ensure that the parent path exists
|
||||||
|
std::filesystem::create_directories(output_path.parent_path());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char answer = '\0';
|
||||||
|
while (answer != 'y' and answer != 'n') {
|
||||||
|
std::cout << "Warning ! A file named " << output_path
|
||||||
|
<< " Already exists !\n\tOverwrite ? (y/n) ";
|
||||||
|
std::string buffer;
|
||||||
|
std::cin >> buffer;
|
||||||
|
answer = buffer[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (answer == 'n') { throw std::runtime_error("Aborted"); }
|
||||||
|
|
||||||
|
std::filesystem::remove(output_path);
|
||||||
|
}
|
||||||
|
}// namespace
|
||||||
|
|
||||||
|
Engine::Engine(int argc, char **argv) : argv(argv), argc(argc) {
|
||||||
|
if (argc < 3) {
|
||||||
|
std::cout << "Missing arguments" << std::endl;
|
||||||
|
std::cout << "Usage: " << argv[0] << " <dataset_output_path> "
|
||||||
|
<< "<update|matrix_size|...> (<args>)" << std::endl;
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::string(argv[2]) == "update") {
|
||||||
|
mode = EngineMode::kUpdateCount;
|
||||||
|
} else if (std::string(argv[2]) == "matrix_size") {
|
||||||
|
mode = EngineMode::kMatrixSize;
|
||||||
|
} else {
|
||||||
|
std::cout << "Error: Unknown mode: " << argv[2] << std::endl;
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
dataset_output_path = argv[1];
|
||||||
|
checkOutputPath(dataset_output_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Engine::exec() {
|
||||||
|
auto executor = makeExecutor(mode, this);
|
||||||
|
auto return_code = executor->exec();
|
||||||
|
|
||||||
|
return return_code;
|
||||||
|
}
|
129
random_generator/src/MatrixSizeExecutor.cpp
Normal file
129
random_generator/src/MatrixSizeExecutor.cpp
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
|
||||||
|
#include "MatrixSizeExecutor.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ChainedUpdatePlanner.hpp"
|
||||||
|
#include "CycleGenerator.hpp"
|
||||||
|
#include "SizedBasedCycleStream.hpp"
|
||||||
|
|
||||||
|
#include "Engine.hpp"
|
||||||
|
|
||||||
|
using namespace randomgen;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
size_t dumpCyclesToDatastream(hdf5CycleOutputStream &ds, std::vector<Cycle> &cycle_buffer) {
|
||||||
|
#pragma omp critical
|
||||||
|
for (auto &c: cycle_buffer) { ds << c; }
|
||||||
|
|
||||||
|
// Number of cycles that were dumped
|
||||||
|
size_t res = cycle_buffer.size();
|
||||||
|
cycle_buffer.clear();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void checkMultisizedParameters(const size_t starting_matrix_size, const size_t final_matrix_size,
|
||||||
|
const size_t update_count, const size_t max_split_count,
|
||||||
|
const size_t cycle_per_rank) {
|
||||||
|
if (starting_matrix_size > final_matrix_size) {
|
||||||
|
throw std::runtime_error("Starting matrix size must be smaller than final matrix size");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (max_split_count > update_count) {
|
||||||
|
throw std::runtime_error("Update count must be smaller than max split count");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cycle_per_rank == 0) { throw std::runtime_error("Cycle per rank must be greater than 0"); }
|
||||||
|
|
||||||
|
if (update_count > starting_matrix_size) {
|
||||||
|
throw std::runtime_error("Update count must be smaller than starting matrix size");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void generate_multisized_dataset(const std::filesystem::path &output_path,
|
||||||
|
const size_t starting_matrix_size,
|
||||||
|
const size_t final_matrix_size, const size_t update_count,
|
||||||
|
const size_t max_split_count, const size_t cycle_per_rank) {
|
||||||
|
checkMultisizedParameters(starting_matrix_size, final_matrix_size, update_count,
|
||||||
|
max_split_count, cycle_per_rank);
|
||||||
|
|
||||||
|
|
||||||
|
auto planner = std::make_shared<ChainedUpdatePlanBuilder>();
|
||||||
|
auto matrix_generator = std::make_shared<UpdateMatrixGenerator>(1e-3, 1e-6);
|
||||||
|
auto update_generator = std::make_shared<UpdateGenerator>(planner, matrix_generator);
|
||||||
|
|
||||||
|
SizedBasedCycleStream ds(output_path);
|
||||||
|
|
||||||
|
size_t total_rank_count = 0;
|
||||||
|
for (size_t i = starting_matrix_size; i <= final_matrix_size; i++) {
|
||||||
|
long splits_count = std::min(max_split_count, update_count - 1);
|
||||||
|
splits_count = std::max(splits_count, 0L);
|
||||||
|
total_rank_count += (splits_count + 1);
|
||||||
|
}
|
||||||
|
size_t generated_rank = 0;
|
||||||
|
size_t generated_cycles = 0;
|
||||||
|
|
||||||
|
|
||||||
|
#pragma omp parallel default(none) shared(generated_cycles, generated_rank) \
|
||||||
|
shared(starting_matrix_size, final_matrix_size, update_generator, cycle_per_rank, \
|
||||||
|
max_split_count, update_count, ds, total_rank_count, std::cout)
|
||||||
|
{
|
||||||
|
// Generate this amount of cycles before dumping them
|
||||||
|
// This is to avoid contention
|
||||||
|
const size_t cycle_buffer_size = std::min(256UL, 2 * cycle_per_rank);
|
||||||
|
std::vector<Cycle> cycle_buffer;
|
||||||
|
|
||||||
|
|
||||||
|
// Iterate on the matrix size
|
||||||
|
#pragma omp for schedule(dynamic)
|
||||||
|
for (size_t curr_size = starting_matrix_size; curr_size <= final_matrix_size; curr_size++) {
|
||||||
|
CycleGenerator generator(curr_size, 0, update_generator);
|
||||||
|
size_t splits_count = std::min(max_split_count, update_count - 1);
|
||||||
|
|
||||||
|
// Iterate on the number of splits
|
||||||
|
for (size_t n_splits = 0; n_splits <= splits_count; n_splits++) {
|
||||||
|
|
||||||
|
if (omp_get_thread_num() == 0) {
|
||||||
|
std::cout << "Generated " << generated_rank << "/" << total_rank_count << " ranks"
|
||||||
|
<< "(" << generated_cycles << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate cycle_per_rank matrices
|
||||||
|
for (size_t k = 0; k < cycle_per_rank; k++) {
|
||||||
|
cycle_buffer.push_back(generator.make(update_count, n_splits));
|
||||||
|
// If we have generated enough cycles, dump them
|
||||||
|
|
||||||
|
if (cycle_buffer.size() == cycle_buffer_size) {
|
||||||
|
// ensure the counter increase is atomic
|
||||||
|
size_t tmp = dumpCyclesToDatastream(ds, cycle_buffer);
|
||||||
|
#pragma omp atomic
|
||||||
|
generated_cycles += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#pragma omp atomic
|
||||||
|
generated_rank += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}// namespace
|
||||||
|
|
||||||
|
int MatrixSizeExecutor::exec() {
|
||||||
|
|
||||||
|
constexpr size_t starting_matrix_size = 31;
|
||||||
|
constexpr size_t ending_matrix_size = 200;
|
||||||
|
constexpr size_t nbr_update = 31;
|
||||||
|
constexpr size_t max_split_count = nbr_update - 1;
|
||||||
|
constexpr size_t cycle_per_rank = 20;
|
||||||
|
|
||||||
|
auto output_path = parent->getOutputPath();
|
||||||
|
|
||||||
|
generate_multisized_dataset(output_path, starting_matrix_size, ending_matrix_size, nbr_update,
|
||||||
|
max_split_count, cycle_per_rank);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
126
random_generator/src/UpdateCountExecutor.cpp
Normal file
126
random_generator/src/UpdateCountExecutor.cpp
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
|
||||||
|
|
||||||
|
#include "UpdateCountExecutor.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ChainedUpdatePlanner.hpp"
|
||||||
|
#include "CycleGenerator.hpp"
|
||||||
|
#include "Engine.hpp"
|
||||||
|
#include "hdf5CycleOutputStream.hpp"
|
||||||
|
|
||||||
|
using namespace randomgen;
|
||||||
|
|
||||||
|
size_t dumpCyclesToDatastream(hdf5CycleOutputStream &ds, std::vector<Cycle> &cycle_buffer) {
|
||||||
|
#pragma omp critical
|
||||||
|
for (auto &c: cycle_buffer) { ds << c; }
|
||||||
|
|
||||||
|
// Number of cycles that were dumped
|
||||||
|
size_t res = cycle_buffer.size();
|
||||||
|
cycle_buffer.clear();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void generateUpdates(std::filesystem::path output_path, const size_t matrix_size,
|
||||||
|
const size_t y_padding, const size_t lower_update_count,
|
||||||
|
const size_t upper_update_count, const size_t max_split_count,
|
||||||
|
const size_t cycle_per_rank) {
|
||||||
|
|
||||||
|
|
||||||
|
auto planner = std::make_shared<ChainedUpdatePlanBuilder>();
|
||||||
|
auto matrix_generator = std::make_shared<UpdateMatrixGenerator>(1e-6, 1e-6);
|
||||||
|
auto update_generator = std::make_shared<UpdateGenerator>(planner, matrix_generator);
|
||||||
|
CycleGenerator generator(matrix_size, y_padding, update_generator);
|
||||||
|
|
||||||
|
hdf5CycleOutputStream ds(output_path);
|
||||||
|
|
||||||
|
// For pretty printing the progress
|
||||||
|
// The rank is a combination of (n_update, n_splits)
|
||||||
|
size_t total_rank = 0;
|
||||||
|
size_t current_rank = 0;
|
||||||
|
size_t cycle_counter = 0;
|
||||||
|
|
||||||
|
// Total number of ranks to compute
|
||||||
|
for (size_t i = lower_update_count; i <= upper_update_count; i++) {
|
||||||
|
size_t splits_count = std::min(max_split_count, i);
|
||||||
|
splits_count = std::max(splits_count - 2, 0UL);
|
||||||
|
total_rank += splits_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#pragma omp parallel default(none) \
|
||||||
|
shared(total_rank, current_rank, cycle_counter, generator, ds, std::cout, \
|
||||||
|
lower_update_count, upper_update_count, max_split_count, cycle_per_rank)
|
||||||
|
{
|
||||||
|
// Generate this amount of cycles before dumping them
|
||||||
|
// This is to avoid contention
|
||||||
|
const size_t cycle_buffer_size = std::min(256UL, 2 * cycle_per_rank);
|
||||||
|
std::vector<Cycle> cycle_buffer;
|
||||||
|
|
||||||
|
// Some cycles take longer to generate than others
|
||||||
|
// (espcially cycles with more splits and / or more updates)
|
||||||
|
// So we use a dynamic scheduling
|
||||||
|
#pragma omp for schedule(dynamic)
|
||||||
|
for (size_t n_update = lower_update_count; n_update <= upper_update_count; n_update++) {
|
||||||
|
size_t splits_count = std::min(max_split_count, n_update - 1);
|
||||||
|
// For each update count, generate all combinations of splits number
|
||||||
|
for (size_t n_splits = 2; n_splits <= splits_count; n_splits++) {
|
||||||
|
|
||||||
|
// Pretty print the progress :)
|
||||||
|
if (omp_get_thread_num() == 0) {
|
||||||
|
std::cout << "Generated rank " << current_rank << "/" << total_rank << " ("
|
||||||
|
<< cycle_counter << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t k = 0; k < cycle_per_rank; k++) {
|
||||||
|
cycle_buffer.push_back(generator.make(n_update, n_splits));
|
||||||
|
// If we have generated enough cycles, dump them
|
||||||
|
|
||||||
|
if (cycle_buffer.size() == cycle_buffer_size) {
|
||||||
|
// ensure the counter increase is atomic
|
||||||
|
size_t tmp = dumpCyclesToDatastream(ds, cycle_buffer);
|
||||||
|
#pragma omp atomic
|
||||||
|
cycle_counter += tmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#pragma omp atomic
|
||||||
|
current_rank++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Dump the remaining cycles still in the buffer
|
||||||
|
size_t tmp = dumpCyclesToDatastream(ds, cycle_buffer);
|
||||||
|
#pragma omp atomic
|
||||||
|
cycle_counter += tmp;
|
||||||
|
}
|
||||||
|
std::cout << "Generated rank " << current_rank << "/" << total_rank << " (" << cycle_counter
|
||||||
|
<< ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int UpdateCountExecutor::exec() {
|
||||||
|
|
||||||
|
// The generated matrix are square matrices...
|
||||||
|
// But we can pad them with zeros to make them rectangular (useful for optimal vectorization)
|
||||||
|
constexpr size_t matrix_size = 21;
|
||||||
|
constexpr size_t y_padding = 3;
|
||||||
|
|
||||||
|
// Start generating cycles with this number of updates
|
||||||
|
constexpr size_t lower_update_count = 1;
|
||||||
|
// The maximal number of updates per cycle to generate
|
||||||
|
constexpr size_t upper_update_count = 20;
|
||||||
|
// The maximum number of splits per cycle
|
||||||
|
// By default, this is capped to upper_update_count - 1
|
||||||
|
constexpr size_t max_split_count = upper_update_count - 1;
|
||||||
|
// The number of cycles to generate for each "rank"
|
||||||
|
// Where the rank is the combination (update_count, split_count)
|
||||||
|
constexpr size_t cycle_per_rank = 80;
|
||||||
|
|
||||||
|
auto output_path = parent->getOutputPath();
|
||||||
|
|
||||||
|
generateUpdates(output_path, matrix_size, y_padding, lower_update_count, upper_update_count,
|
||||||
|
max_split_count, cycle_per_rank);
|
||||||
|
return 0;
|
||||||
|
}
|
29
random_generator/src/c_api_test.c
Normal file
29
random_generator/src/c_api_test.c
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
|
||||||
|
#include "CApi.h"
|
||||||
|
#include "stdio.h"
|
||||||
|
#include "stdlib.h"
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
Cycle *cycle = generateRandomCycle(3, 5, 3, 2, 1e-3, 1e-3);
|
||||||
|
printf("%d\n", cycle->n_update);
|
||||||
|
printf("%d\n", cycle->dim);
|
||||||
|
printf("%d\n", cycle->lds);
|
||||||
|
printf("%f\n", cycle->determinant);
|
||||||
|
// Matrix:
|
||||||
|
for (int i = 0; i < cycle->dim; i++) {
|
||||||
|
for (int j = 0; j < cycle->lds; j++) {
|
||||||
|
printf("%f ", cycle->slater_matrix[i * cycle->lds + j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
// update_array:
|
||||||
|
for (int i = 0; i < cycle->n_update; i++) {
|
||||||
|
printf("Update %i:\n", i);
|
||||||
|
for (int j = 0; j < cycle->lds; j++) { printf("%lf ", cycle->updates[i * cycle->lds + j]); }
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
freeCycle(&cycle);
|
||||||
|
return 0;
|
||||||
|
}
|
100
random_generator/src/cycle_generation/CApi.cpp
Normal file
100
random_generator/src/cycle_generation/CApi.cpp
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
#include "CApi.h"
|
||||||
|
#include "ChainedUpdatePlanner.hpp"
|
||||||
|
#include "CycleGenerator.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void convertMatrix(randomgen::Matrix source, double **destination) {
|
||||||
|
*destination = (double *) malloc(sizeof(double) * source.x * source.y);
|
||||||
|
std::copy(source.data(), source.data() + source.x * source.y, *destination);
|
||||||
|
}
|
||||||
|
|
||||||
|
void convertMatrices(randomgen::Cycle &cpp_cycle, Cycle *res) {// Copy the slater matrix
|
||||||
|
// Copy the inverse slater matrix
|
||||||
|
convertMatrix(cpp_cycle.getSlaterMatrix(), &res->slater_matrix);
|
||||||
|
convertMatrix(cpp_cycle.getSlaterInverseTransposed(), &res->slater_inverse_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void convertUpdates(randomgen::Cycle &cpp_cycle, Cycle *res) {
|
||||||
|
auto &update_matrix = cpp_cycle.getUpdateMatrix();
|
||||||
|
auto raw_updates = update_matrix.getRawUpdates();
|
||||||
|
res->updates = (double *) malloc(sizeof(double) * update_matrix.getUpdateCount() *
|
||||||
|
update_matrix.getUpdateLength());
|
||||||
|
|
||||||
|
std::copy(raw_updates,
|
||||||
|
raw_updates + update_matrix.getUpdateCount() * update_matrix.getUpdateLength(),
|
||||||
|
res->updates);
|
||||||
|
|
||||||
|
res->col_update_index = (uint32_t *) malloc(sizeof(uint32_t) * update_matrix.getUpdateCount());
|
||||||
|
std::copy(update_matrix.getUpdateIndices(),
|
||||||
|
update_matrix.getUpdateIndices() + update_matrix.getUpdateCount(),
|
||||||
|
res->col_update_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Cycle *convertCPPCycleToC(randomgen::Cycle &cpp_cycle) {
|
||||||
|
auto *res = (Cycle *) malloc(sizeof(Cycle));
|
||||||
|
res->lds = cpp_cycle.getSlaterMatrix().y;
|
||||||
|
res->dim = cpp_cycle.getSlaterMatrix().x;
|
||||||
|
res->n_update = cpp_cycle.getUpdateMatrix().getUpdateCount();
|
||||||
|
res->determinant = cpp_cycle.getSlaterMatrix().getLastKnownDeterminant();
|
||||||
|
|
||||||
|
convertMatrices(cpp_cycle, res);
|
||||||
|
convertUpdates(cpp_cycle, res);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace
|
||||||
|
|
||||||
|
// Those functions must have C linkage
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
Cycle *generateRandomCycle(uint32_t dim, uint32_t lds, uint32_t n_update, uint32_t n_splits,
|
||||||
|
double determinant_threshold, double splitting_update_noise_magnitude) {
|
||||||
|
|
||||||
|
if (lds < dim) {
|
||||||
|
std::cerr << "generateRandomCycle: lds < dim" << std::endl;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto y_padding = lds - dim;
|
||||||
|
|
||||||
|
// We need to build a generator at every call, which is not optimal
|
||||||
|
auto planner = std::make_unique<randomgen::ChainedUpdatePlanBuilder>();
|
||||||
|
auto matrix_generator = std::make_unique<randomgen::UpdateMatrixGenerator>(
|
||||||
|
splitting_update_noise_magnitude, determinant_threshold);
|
||||||
|
auto update_generator = std::make_shared<randomgen::UpdateGenerator>(std::move(planner),
|
||||||
|
std::move(matrix_generator));
|
||||||
|
|
||||||
|
|
||||||
|
// This will generate a random cycle
|
||||||
|
// However, those class use smart pointers (that are private to the library)
|
||||||
|
// So we can't return them directly.
|
||||||
|
// This requires the additional step of converting the C++ class to a C struct.
|
||||||
|
// Which has a cost memory/performance wise
|
||||||
|
randomgen::CycleGenerator generator(dim, y_padding, update_generator);
|
||||||
|
randomgen::Cycle cpp_cycle = generator.make(n_update, n_splits);
|
||||||
|
auto converted = convertCPPCycleToC(cpp_cycle);
|
||||||
|
return converted;
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeCycle(Cycle **cycle) {
|
||||||
|
if (cycle == nullptr or *cycle == nullptr) {
|
||||||
|
std::cerr << "freeCycle: cycle is nullptr !" << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
auto c = *cycle;
|
||||||
|
free(c->slater_matrix);
|
||||||
|
free(c->slater_inverse_t);
|
||||||
|
free(c->updates);
|
||||||
|
free(c->col_update_index);
|
||||||
|
free(c);
|
||||||
|
*cycle = nullptr;
|
||||||
|
}
|
||||||
|
}
|
14
random_generator/src/cycle_generation/CMakeLists.txt
Normal file
14
random_generator/src/cycle_generation/CMakeLists.txt
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
add_subdirectory(update_generation)
|
||||||
|
|
||||||
|
set(CURRENT_INCLUDE_DIR ${INCLUDE_DIR}/cycle_generation)
|
||||||
|
add_library(cycle_generation
|
||||||
|
${CURRENT_INCLUDE_DIR}/Cycle.hpp
|
||||||
|
Matrix.cpp ${CURRENT_INCLUDE_DIR}/Matrix.hpp
|
||||||
|
hdf5CycleOutputStream.cpp ${CURRENT_INCLUDE_DIR}/hdf5CycleOutputStream.hpp
|
||||||
|
SizedBasedCycleStream.cpp ${CURRENT_INCLUDE_DIR}/SizedBasedCycleStream.hpp
|
||||||
|
CycleGenerator.cpp ${CURRENT_INCLUDE_DIR}/CycleGenerator.hpp
|
||||||
|
CApi.cpp)
|
||||||
|
target_include_directories(cycle_generation PUBLIC ${CURRENT_INCLUDE_DIR} ${HDF5_INCLUDE_DIRS})
|
||||||
|
target_link_libraries(cycle_generation PUBLIC update_generation ${HDF5_LIBRARIES}
|
||||||
|
m ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} lapacke)
|
74
random_generator/src/cycle_generation/CycleGenerator.cpp
Normal file
74
random_generator/src/cycle_generation/CycleGenerator.cpp
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
|
||||||
|
|
||||||
|
#include "cycle_generation/CycleGenerator.hpp"
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
CycleGenerator::CycleGenerator(size_t x, size_t y_padding,
|
||||||
|
std::shared_ptr<const UpdateGenerator> update_generator)
|
||||||
|
: update_generator(std::move(update_generator)), x(x), y_padding(y_padding) {}
|
||||||
|
|
||||||
|
|
||||||
|
void CycleGenerator::generateCycleMatrices(Cycle *res) {
|
||||||
|
// Generate random matrices until we find one with a given determinant
|
||||||
|
do {
|
||||||
|
res->getSlaterMatrix() = matrix_generator(x);
|
||||||
|
} while (res->getSlaterMatrix().getLastKnownDeterminant() < 1e-5);
|
||||||
|
|
||||||
|
res->getSlaterInverse() = res->getSlaterMatrix().inverse();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Cycle CycleGenerator::attemptToGenerateCycle(size_t n_update, size_t n_splits) {
|
||||||
|
Cycle res{};
|
||||||
|
size_t attempt = 0;
|
||||||
|
constexpr size_t kMaxAttempts = 20;
|
||||||
|
|
||||||
|
auto plan = update_generator->plan(n_update, n_splits);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
attempt++;
|
||||||
|
generateCycleMatrices(&res);
|
||||||
|
|
||||||
|
// Try to find a valid update vector
|
||||||
|
// If we can't, start from scratch
|
||||||
|
try {
|
||||||
|
res.getUpdateMatrix() =
|
||||||
|
update_generator->make(res.getSlaterMatrix().transpose(), y_padding, plan);
|
||||||
|
break;
|
||||||
|
} catch (UpdateMatrixGenerator::FailedUpdateGeneration &e) {
|
||||||
|
// Abort after a certain number of attempts
|
||||||
|
if (attempt > kMaxAttempts) {
|
||||||
|
throw std::runtime_error("Could not generate a valid cycle in " +
|
||||||
|
std::to_string(attempt) + " attempts");
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CycleGenerator::finalizeCycle(Cycle &res) const {
|
||||||
|
// The inverse in the dataset must be transposed
|
||||||
|
// this allows the code of Scherman-morrison to be vectorized since column-updates becomes row-updates
|
||||||
|
res.getSlaterInverse() = res.getSlaterInverse().transpose();
|
||||||
|
res.getSlaterMatrix().pad(0, y_padding);
|
||||||
|
res.getSlaterInverse().pad(0, y_padding);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < res.getUpdateMatrix().getUpdateCount(); i++) {
|
||||||
|
res.getUpdateMatrix().getUpdateIndices()[i] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Cycle CycleGenerator::make(size_t n_update, size_t n_splits) {
|
||||||
|
|
||||||
|
Cycle res = attemptToGenerateCycle(n_update, n_splits);
|
||||||
|
finalizeCycle(res);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}// namespace randomgen
|
110
random_generator/src/cycle_generation/Matrix.cpp
Normal file
110
random_generator/src/cycle_generation/Matrix.cpp
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
|
||||||
|
#include "cycle_generation/Matrix.hpp"
|
||||||
|
#include <lapacke.h>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
Matrix::Matrix(size_t x, size_t y) : x(x), y(y) { array = std::make_unique<double[]>(x * y); }
|
||||||
|
|
||||||
|
Matrix::Matrix(const Matrix &other) : Matrix(other.x, other.y) {
|
||||||
|
std::copy(other.array.get(), other.array.get() + x * y, array.get());
|
||||||
|
determinant = other.determinant;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix &Matrix::operator=(const Matrix &other) {
|
||||||
|
if (this == &other) { return *this; }
|
||||||
|
// Realloc a matrix if the size is different
|
||||||
|
|
||||||
|
if (x * y != other.x * other.y) { array = std::make_unique<double[]>(other.x * other.y); }
|
||||||
|
|
||||||
|
x = other.x;
|
||||||
|
y = other.y;
|
||||||
|
|
||||||
|
std::copy(other.array.get(), other.array.get() + x * y, array.get());
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix Matrix::inverse() {
|
||||||
|
// Return the inverse of this matrix using lapacke dgetrf + dgetri
|
||||||
|
Matrix inv = *this;
|
||||||
|
std::vector<int> ipiv(x);
|
||||||
|
auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, x, y, inv.array.get(), x, ipiv.data());
|
||||||
|
if (info != 0) { throw std::runtime_error("Matrix inversion failed"); }
|
||||||
|
auto info2 = LAPACKE_dgetri(LAPACK_ROW_MAJOR, x, inv.array.get(), x, ipiv.data());
|
||||||
|
if (info2 != 0) { throw std::runtime_error("Matrix inversion failed"); }
|
||||||
|
|
||||||
|
inv.computeDeterminant();
|
||||||
|
return inv;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix Matrix::transpose() {
|
||||||
|
Matrix res(x, y);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < x; i++) {
|
||||||
|
for (size_t j = 0; j < y; j++) { res.array[j * x + i] = array[i * y + j]; }
|
||||||
|
}
|
||||||
|
res.computeDeterminant();
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const Matrix &m) {
|
||||||
|
for (size_t i = 0; i < m.x; i++) {
|
||||||
|
for (size_t j = 0; j < m.y; j++) { os << m.array[i * m.y + j] << " "; }
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Matrix::pad(size_t x_pad, size_t y_pad) {
|
||||||
|
if (x_pad == 0 && y_pad == 0) { return; }
|
||||||
|
|
||||||
|
size_t new_size = (x + x_pad) * (y + y_pad);
|
||||||
|
std::unique_ptr<double[]> padded_data = std::make_unique<double[]>(new_size);
|
||||||
|
|
||||||
|
// Set the padding to 0
|
||||||
|
std::fill(padded_data.get(), padded_data.get() + (new_size), 0);
|
||||||
|
|
||||||
|
// Copy the old data to the new data
|
||||||
|
for (int i = 0; i < x; i++) {
|
||||||
|
for (int j = 0; j < y; j++) { padded_data[i * (y + y_pad) + j] = array[i * y + j]; }
|
||||||
|
}
|
||||||
|
x += x_pad;
|
||||||
|
y += y_pad;
|
||||||
|
this->array = std::move(padded_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
double Matrix::computeDeterminant() {
|
||||||
|
// Returns the determinant of the input matrix using LAPACK
|
||||||
|
// Copy the matrix as lapack modifies it
|
||||||
|
std::unique_ptr<double[]> copy = std::make_unique<double[]>(x * y);
|
||||||
|
std::copy(array.get(), array.get() + (x * y), copy.get());
|
||||||
|
|
||||||
|
std::vector<int> pivot(x);
|
||||||
|
auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, x, y, copy.get(), x, pivot.data());
|
||||||
|
if (info != 0) {
|
||||||
|
std::cout << "HERE!" << std::endl;
|
||||||
|
throw std::runtime_error("Matrix inversion failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
double det = 1;
|
||||||
|
for (int i = 0; i < y; ++i) { det *= array[i * x + i]; }
|
||||||
|
determinant = det;
|
||||||
|
return det;
|
||||||
|
}
|
||||||
|
|
||||||
|
double Matrix::getLastKnownDeterminant() const { return determinant; }
|
||||||
|
|
||||||
|
|
||||||
|
Matrix MatrixGenerator::operator()(size_t matrix_size, double vmin, double vmax) {
|
||||||
|
Matrix res{matrix_size, matrix_size};
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_real_distribution<> dis(vmin, vmax);
|
||||||
|
std::generate(res.array.get(), res.array.get() + (matrix_size * matrix_size),
|
||||||
|
[&]() { return dis(gen); });
|
||||||
|
res.computeDeterminant();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
#include "cycle_generation/SizedBasedCycleStream.hpp"
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
std::string SizedBasedCycleStream::getPathFor(const randomgen::Cycle &cycle) {
|
||||||
|
// Save the cycle based on the size of its matrix + number of splits
|
||||||
|
std::string res = "/";
|
||||||
|
res += std::to_string(cycle.getSlaterMatrix().x) + "/";
|
||||||
|
res += std::to_string(cycle.getUpdateMatrix().getSplitCount()) + "/";
|
||||||
|
res += "cycle_" + std::to_string(last_unique_cycle_id);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace randomgen
|
135
random_generator/src/cycle_generation/hdf5CycleOutputStream.cpp
Normal file
135
random_generator/src/cycle_generation/hdf5CycleOutputStream.cpp
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
#include "cycle_generation/hdf5CycleOutputStream.hpp"
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void writeDeterminant(const Cycle &cycle,
|
||||||
|
hid_t cycle_gid) {// Write the determinant of the matrix
|
||||||
|
hid_t det_space = H5Screate_simple(1, std::vector<hsize_t>{1}.data(), nullptr);
|
||||||
|
hid_t determinant_loc =
|
||||||
|
H5Dcreate1(cycle_gid, "determinant", H5T_NATIVE_DOUBLE, det_space, H5P_DEFAULT);
|
||||||
|
if (determinant_loc < 0) { throw std::runtime_error("H5Dcreate1"); }
|
||||||
|
|
||||||
|
double determinant = cycle.getSlaterMatrix().getLastKnownDeterminant();
|
||||||
|
|
||||||
|
H5Dwrite(determinant_loc, H5T_NATIVE_DOUBLE, det_space, H5S_ALL, H5S_ALL, &determinant);
|
||||||
|
H5Dclose(determinant_loc);
|
||||||
|
H5Sclose(det_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeMatrix(const Matrix &mat, hid_t group_id, hid_t slater_space,
|
||||||
|
const std::string &dataset_name) {
|
||||||
|
hid_t slater_matrix_loc = H5Dcreate1(group_id, dataset_name.c_str(), H5T_NATIVE_DOUBLE,
|
||||||
|
slater_space, H5P_DEFAULT);
|
||||||
|
if (slater_matrix_loc < 0) { throw std::runtime_error("H5Dcreate1"); }
|
||||||
|
|
||||||
|
H5Dwrite(slater_matrix_loc, H5T_NATIVE_DOUBLE, slater_space, H5S_ALL, H5S_ALL, mat.data());
|
||||||
|
H5Dclose(slater_matrix_loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void writeMatrices(const Cycle &cycle, hid_t group_gid) {
|
||||||
|
unsigned int y_size = cycle.getSlaterInverseTransposed().y,
|
||||||
|
x_size = cycle.getSlaterInverseTransposed().x;
|
||||||
|
|
||||||
|
// Write both the matrix and its inverse
|
||||||
|
auto slater_space = H5Screate_simple(2, std::vector<hsize_t>{x_size, y_size}.data(), nullptr);
|
||||||
|
writeMatrix(cycle.getSlaterMatrix(), group_gid, slater_space, "slater_matrix");
|
||||||
|
writeMatrix(cycle.getSlaterInverseTransposed(), group_gid, slater_space, "slater_inverse_t");
|
||||||
|
H5Sclose(slater_space);
|
||||||
|
|
||||||
|
writeDeterminant(cycle, group_gid);
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeUpdatesMetadata(const Cycle &cycle, hid_t cycle_gid) {// Write the number of updates
|
||||||
|
hid_t updates_count_space = H5Screate_simple(1, std::vector<hsize_t>{1}.data(), nullptr);
|
||||||
|
hid_t updates_count_loc = H5Dcreate1(cycle_gid, "nupdates", H5T_NATIVE_UINT32,
|
||||||
|
updates_count_space, H5P_DEFAULT);
|
||||||
|
if (updates_count_loc < 0) { throw std::runtime_error("H5Dcreate1"); }
|
||||||
|
|
||||||
|
auto update_count = cycle.getUpdateMatrix().getUpdateCount();
|
||||||
|
H5Dwrite(updates_count_loc, H5T_NATIVE_UINT32, updates_count_space, H5S_ALL, H5S_ALL,
|
||||||
|
&update_count);
|
||||||
|
H5Dclose(updates_count_loc);
|
||||||
|
H5Sclose(updates_count_space);
|
||||||
|
|
||||||
|
|
||||||
|
// Write the columns of the updates matrix
|
||||||
|
hid_t updates_index = H5Screate_simple(
|
||||||
|
1, std::vector<hsize_t>{cycle.getUpdateMatrix().getUpdateCount()}.data(), nullptr);
|
||||||
|
hid_t updates_index_loc = H5Dcreate1(cycle_gid, "col_update_index", H5T_NATIVE_UINT32,
|
||||||
|
updates_index, H5P_DEFAULT);
|
||||||
|
if (updates_index_loc < 0) { throw std::runtime_error("H5Dcreate1"); }
|
||||||
|
H5Dwrite(updates_index_loc, H5T_NATIVE_UINT32, updates_index, H5S_ALL, H5S_ALL,
|
||||||
|
cycle.getUpdateMatrix().getUpdateIndices());
|
||||||
|
H5Dclose(updates_index_loc);
|
||||||
|
H5Sclose(updates_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeUpdateMatrix(const Cycle &cycle, hid_t cycle_gid) {// Write the updates matrix
|
||||||
|
unsigned int y_size = cycle.getSlaterInverseTransposed().y;
|
||||||
|
|
||||||
|
hid_t update_space = H5Screate_simple(
|
||||||
|
2, std::vector<hsize_t>{cycle.getUpdateMatrix().getUpdateCount(), y_size}.data(),
|
||||||
|
nullptr);
|
||||||
|
hid_t update_loc =
|
||||||
|
H5Dcreate1(cycle_gid, "updates", H5T_NATIVE_DOUBLE, update_space, H5P_DEFAULT);
|
||||||
|
if (update_loc < 0) { throw std::runtime_error("H5Dcreate1"); }
|
||||||
|
H5Dwrite(update_loc, H5T_NATIVE_DOUBLE, update_space, H5S_ALL, H5S_ALL,
|
||||||
|
cycle.getUpdateMatrix().getRawUpdates());
|
||||||
|
H5Dclose(update_loc);
|
||||||
|
H5Sclose(update_space);
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeUpdates(const Cycle &cycle, hid_t cycle_id) {
|
||||||
|
writeUpdatesMetadata(cycle, cycle_id);
|
||||||
|
writeUpdateMatrix(cycle, cycle_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace
|
||||||
|
|
||||||
|
hdf5CycleOutputStream::hdf5CycleOutputStream(const fs::path &dataset_path) {
|
||||||
|
// Create a new file
|
||||||
|
file_id = H5Fcreate(dataset_path.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
|
||||||
|
if (file_id < 0) { throw std::runtime_error("Could not create dataset file"); }
|
||||||
|
}
|
||||||
|
|
||||||
|
hdf5CycleOutputStream::~hdf5CycleOutputStream() {
|
||||||
|
if (file_id >= 0) { H5Fclose(file_id); }
|
||||||
|
file_id = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
hid_t hdf5CycleOutputStream::createGroup(const std::string &path) {
|
||||||
|
// Needed to create the parent directories automatically
|
||||||
|
auto lcpl = H5Pcreate(H5P_LINK_CREATE);
|
||||||
|
H5Pset_create_intermediate_group(lcpl, 1);
|
||||||
|
hid_t group_id = H5Gcreate2(file_id, path.c_str(), lcpl, H5P_DEFAULT, H5P_DEFAULT);
|
||||||
|
H5Pclose(lcpl);
|
||||||
|
return group_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string hdf5CycleOutputStream::getPathFor(const Cycle &cycle) {
|
||||||
|
std::string res = "/";
|
||||||
|
res += std::to_string(cycle.getUpdateMatrix().getUpdateCount()) + "/";
|
||||||
|
res += std::to_string(cycle.getUpdateMatrix().getSplitCount()) + "/";
|
||||||
|
res += "cycle_" + std::to_string(last_unique_cycle_id);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
hdf5CycleOutputStream &operator<<(hdf5CycleOutputStream &ds, const Cycle &cycle) {
|
||||||
|
// Build the path to the new dataset inside the file
|
||||||
|
std::string cycle_path = ds.getPathFor(cycle);
|
||||||
|
hid_t group_id = ds.createGroup(cycle_path);
|
||||||
|
ds.last_unique_cycle_id++;
|
||||||
|
|
||||||
|
writeMatrices(cycle, group_id);
|
||||||
|
writeUpdates(cycle, group_id);
|
||||||
|
|
||||||
|
H5Gclose(group_id);
|
||||||
|
return ds;
|
||||||
|
}
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
set(CURRENT_INCLUDE_DIR ${INCLUDE_DIR}/cycle_generation/update_generation)
|
||||||
|
add_library(update_generation
|
||||||
|
UpdateDescriptor.cpp ${CURRENT_INCLUDE_DIR}/UpdateDescriptor.hpp
|
||||||
|
UpdateMatrix.cpp ${CURRENT_INCLUDE_DIR}/UpdateMatrix.hpp
|
||||||
|
UpdatePlan.cpp ${CURRENT_INCLUDE_DIR}/UpdatePlan.hpp
|
||||||
|
${CURRENT_INCLUDE_DIR}/UpdatePlanner.hpp
|
||||||
|
UpdateMatrixGenerator.cpp ${CURRENT_INCLUDE_DIR}/UpdateGenerator.hpp
|
||||||
|
UpdateGenerator.cpp ${CURRENT_INCLUDE_DIR}/UpdateGenerator.hpp
|
||||||
|
ChainedUpdatePlanner.cpp ${CURRENT_INCLUDE_DIR}/ChainedUpdatePlanner.hpp
|
||||||
|
)
|
||||||
|
set_target_properties(update_generation PROPERTIES
|
||||||
|
ARCHIVE_OUTPUT_DIRECTORY ${GLOBAL_LIB_DIRECTORY} LIBRARY_OUTPUT_DIRECTORY ${GLOBAL_LIB_DIRECTORY}
|
||||||
|
)
|
||||||
|
|
||||||
|
target_include_directories(update_generation PUBLIC ${CURRENT_INCLUDE_DIR} ${INCLUDE_DIR})
|
@ -0,0 +1,92 @@
|
|||||||
|
|
||||||
|
#include "ChainedUpdatePlanner.hpp"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Iterate on a plan and reorders the split to ensure the last update does not split
|
||||||
|
* @param plan
|
||||||
|
*/
|
||||||
|
void reorderSplits(UpdatePlan *plan) {
|
||||||
|
|
||||||
|
// Find the first non-splitting updates
|
||||||
|
size_t first_non_split = -1;
|
||||||
|
for (size_t i = 0; i < plan->size(); ++i) {
|
||||||
|
if (not plan->operator[](i).isSplitting()) {
|
||||||
|
first_non_split = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (first_non_split == -1) {
|
||||||
|
throw std::runtime_error("Reordering Failure: No non-splitting update available for "
|
||||||
|
"reordering was found in the plan");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap the first non-splitting update with the last splitting update
|
||||||
|
plan->operator[](plan->size() - 1).setSplitting(false);
|
||||||
|
plan->operator[](first_non_split).setSplitting(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check the order of the splitting updates in the plan, and maybe reorder them if the last update is
|
||||||
|
* a splitting update
|
||||||
|
*
|
||||||
|
* @param plan
|
||||||
|
* @param splits
|
||||||
|
*/
|
||||||
|
void maybeReorderSplits(UpdatePlan *plan, const size_t splits) {
|
||||||
|
if (plan->size() < 2 or splits == 0) { return; }
|
||||||
|
|
||||||
|
// Ensure that the cycle is broken by the last update
|
||||||
|
// Should look something like [..., splits, doesn't splits]
|
||||||
|
bool invalid_chain = plan->at(plan->size() - 1).isSplitting();
|
||||||
|
if (not invalid_chain) { return; }
|
||||||
|
|
||||||
|
// If the chain is invalid, reorder the splits
|
||||||
|
reorderSplits(plan);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Make a plan where updates are generated left-to-right, and splitting updates are based on the right column
|
||||||
|
* This allows the colinearity of the updates to be broken by the next update, effectively ensuring that the final matrix
|
||||||
|
* is not singular
|
||||||
|
*
|
||||||
|
* @param plan
|
||||||
|
* @param splits
|
||||||
|
*/
|
||||||
|
void makeChainedUpdatePlan(UpdatePlan *plan, const size_t n_update, const size_t splits) {
|
||||||
|
|
||||||
|
std::vector<bool> split_order(plan->size(), false);
|
||||||
|
for (size_t i = 0; i < splits; ++i) { split_order[i] = true; }
|
||||||
|
|
||||||
|
std::shuffle(split_order.begin(), split_order.end(), std::mt19937{std::random_device{}()});
|
||||||
|
|
||||||
|
for (size_t i = 0; i < plan->size(); ++i) {
|
||||||
|
auto &update_desc = plan->at(i);
|
||||||
|
update_desc.setSplitting(split_order[i]);
|
||||||
|
// Always split by
|
||||||
|
update_desc.setTargetColumnIndex(i + 1);
|
||||||
|
update_desc.setColumnIndex(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
maybeReorderSplits(plan, splits);
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace
|
||||||
|
|
||||||
|
UpdatePlan ChainedUpdatePlanBuilder::make(uint32_t update_count, uint32_t split_count) const {
|
||||||
|
if (split_count >= update_count)
|
||||||
|
throw std::runtime_error("Cannot have more splits than updates");
|
||||||
|
|
||||||
|
UpdatePlan res(update_count);
|
||||||
|
makeChainedUpdatePlan(&res, update_count, split_count);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,2 @@
|
|||||||
|
|
||||||
|
#include "UpdateDescriptor.hpp"
|
@ -0,0 +1,30 @@
|
|||||||
|
|
||||||
|
#include "UpdateGenerator.hpp"
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
UpdateGenerator::UpdateGenerator(std::shared_ptr<const UpdatePlanner> update_planner,
|
||||||
|
std::shared_ptr<const UpdateMatrixGenerator> update_generator)
|
||||||
|
: planner(std::move(update_planner)), generator(std::move(update_generator)) {}
|
||||||
|
|
||||||
|
|
||||||
|
UpdatePlan UpdateGenerator::plan(uint32_t update_count, uint32_t split_count) const {
|
||||||
|
return planner->make(update_count, split_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateMatrix UpdateGenerator::make(const Matrix &slater_matrix, uint32_t y_padding,
|
||||||
|
const UpdatePlan &update_plan) const {
|
||||||
|
return generator->make(slater_matrix, y_padding, update_plan);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
UpdateMatrix UpdateGenerator::make(const Matrix &slater_matrix_t, uint32_t y_padding,
|
||||||
|
uint32_t update_count, uint32_t split_count) const {
|
||||||
|
auto new_plan = plan(update_count, split_count);
|
||||||
|
return make(slater_matrix_t, y_padding, new_plan);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,48 @@
|
|||||||
|
|
||||||
|
#include "UpdateMatrix.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
UpdateMatrix::UpdateMatrix(const UpdateMatrix &other)
|
||||||
|
: UpdateMatrix(other.n_update, other.n_splits, other.update_length) {}
|
||||||
|
|
||||||
|
UpdateMatrix::UpdateMatrix(uint32_t n_update, uint32_t n_splits, uint32_t update_length)
|
||||||
|
: n_update(n_update), n_splits(n_splits), update_length(update_length) {
|
||||||
|
matrix = std::make_unique<double[]>(n_update * update_length);
|
||||||
|
update_index = std::make_unique<uint32_t[]>(n_update);
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateMatrix &UpdateMatrix::operator=(const UpdateMatrix &other) {
|
||||||
|
if (this == &other) { return *this; }
|
||||||
|
|
||||||
|
// Reallocate arrays if needed
|
||||||
|
if (n_update * update_length != other.n_update * other.update_length) {
|
||||||
|
matrix = std::make_unique<double[]>(other.n_update * other.update_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n_update != other.n_update) { update_index = std::make_unique<uint32_t[]>(other.n_update); }
|
||||||
|
|
||||||
|
n_update = other.n_update;
|
||||||
|
n_splits = other.n_splits;
|
||||||
|
update_length = other.update_length;
|
||||||
|
|
||||||
|
std::copy(other.matrix.get(), other.matrix.get() + n_update * update_length, matrix.get());
|
||||||
|
std::copy(other.update_index.get(), other.update_index.get() + n_update, update_index.get());
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateMatrix::setUpdate(uint32_t update_rank, const std::span<const double> &update,
|
||||||
|
uint32_t column_index) {
|
||||||
|
// Fetch the view where the update must be stored inside the update matrix
|
||||||
|
auto update_span = getUpdateSpan(update_rank);
|
||||||
|
|
||||||
|
if (update.size() > update_span.size()) {
|
||||||
|
throw std::runtime_error("Update vector is too large");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::copy(update.begin(), update.end(), update_span.begin());
|
||||||
|
update_index.get()[update_rank] = column_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,199 @@
|
|||||||
|
|
||||||
|
#include "UpdateMatrixGenerator.hpp"
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<double> tryGenerateNonSplittingUpdate(const UpdateDescriptor &descriptor,
|
||||||
|
const Matrix ¤t_matrix) {
|
||||||
|
|
||||||
|
size_t kMaxAttempts = 100;
|
||||||
|
size_t attempts = 0;
|
||||||
|
|
||||||
|
Matrix copy = current_matrix;
|
||||||
|
std::vector<double> update(current_matrix.x);
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_real_distribution<> dis(0, 5);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
// Generate a new update vector
|
||||||
|
std::generate(update.begin(), update.end(), [&]() { return dis(gen); });
|
||||||
|
|
||||||
|
// Check if the update vector is a valid update
|
||||||
|
for (int i = 0; i < current_matrix.x; i++) {
|
||||||
|
copy.array[i * current_matrix.y + descriptor.getColumnIndex()] += update[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// After the update is applied, the determinant of the matrix should be > 10e-3
|
||||||
|
double absolute_det = std::abs(copy.computeDeterminant());
|
||||||
|
if (absolute_det > 1e-3) { break; }
|
||||||
|
|
||||||
|
// Reverse the update
|
||||||
|
for (int i = 0; i < current_matrix.x; i++) {
|
||||||
|
copy.array[i * current_matrix.y + descriptor.getColumnIndex()] -= update[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
attempts++;
|
||||||
|
if (attempts > kMaxAttempts) {
|
||||||
|
throw UpdateMatrixGenerator::FailedUpdateGeneration(attempts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return update;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> &addRandomNoise(std::vector<double> &update, const double noise_magnitude) {
|
||||||
|
// We add a noise of magnitude 1e-6 to the update vector so the resulting columns are not perfectly collinear
|
||||||
|
// Which ensures the matrix stays invertible.
|
||||||
|
|
||||||
|
// First generate a random vector
|
||||||
|
std::vector<double> noise(update.size());
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 gen(rd());
|
||||||
|
std::uniform_real_distribution<> dis(0, 100);
|
||||||
|
|
||||||
|
std::generate(noise.begin(), noise.end(), [&]() { return dis(gen); });
|
||||||
|
|
||||||
|
// Compute the norm
|
||||||
|
double norm = 0;
|
||||||
|
for (size_t i = 0; i < update.size(); i++) { norm += noise[i] * noise[i]; }
|
||||||
|
|
||||||
|
// Coefficient to have a magnitude of 1e-6
|
||||||
|
// No need to update the noise vector, just multiply it in place
|
||||||
|
norm = noise_magnitude / sqrt(norm);
|
||||||
|
for (size_t i = 0; i < update.size(); i++) { update[i] += noise[i] * norm; }
|
||||||
|
return update;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> generate_splitting_update(const UpdateDescriptor &descriptor,
|
||||||
|
const Matrix &m, const double noise_magnitude) {
|
||||||
|
|
||||||
|
size_t target_index = descriptor.getTargetColumnIndex();
|
||||||
|
|
||||||
|
constexpr double lambda = 1;
|
||||||
|
std::vector<double> update(m.x);
|
||||||
|
for (size_t i = 0; i < m.x; i++) {
|
||||||
|
update[i] = (lambda * m.array[i * m.y + target_index]) // Target column
|
||||||
|
- m.array[i * m.y + descriptor.getColumnIndex()];// Current column
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a small random noise to the update vector to break the singularity
|
||||||
|
update = addRandomNoise(update, noise_magnitude);
|
||||||
|
|
||||||
|
return update;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The updates may be padded with extra zeros, so we need to check the generated update size
|
||||||
|
*/
|
||||||
|
void maybePadUpdate(std::vector<double> &update_vec, uint32_t padded_size) {
|
||||||
|
if (padded_size == update_vec.size()) return;
|
||||||
|
|
||||||
|
if (padded_size < update_vec.size())
|
||||||
|
throw std::runtime_error("Padded update size is smaller than the original update size");
|
||||||
|
|
||||||
|
size_t old_size = update_vec.size();
|
||||||
|
update_vec.resize(padded_size);
|
||||||
|
for (size_t i = old_size; i < padded_size; i++) { update_vec[i] = 0; }
|
||||||
|
}
|
||||||
|
|
||||||
|
void updateSlaterMatrix(
|
||||||
|
Matrix &slater_matrix, const UpdateDescriptor &descriptor,
|
||||||
|
const std::vector<double> &
|
||||||
|
update_vec) {// We found the correct update vector, so return it + update the current matrix
|
||||||
|
for (int i = 0; i < slater_matrix.y; i++) {
|
||||||
|
slater_matrix.array[i * slater_matrix.y + descriptor.getColumnIndex()] += update_vec[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> makePotentialUpdate(const UpdateDescriptor &descriptor,
|
||||||
|
const Matrix &slater_matrix, uint32_t padded_size,
|
||||||
|
double noise_magnitude) {
|
||||||
|
|
||||||
|
std::vector<double> res;
|
||||||
|
if (descriptor.isSplitting()) {
|
||||||
|
res = generate_splitting_update(descriptor, slater_matrix, noise_magnitude);
|
||||||
|
} else {
|
||||||
|
res = tryGenerateNonSplittingUpdate(descriptor, slater_matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
maybePadUpdate(res, padded_size);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace
|
||||||
|
|
||||||
|
// Alias for the exception throw when the generation fails to find a correct update.
|
||||||
|
// This exception is not make for runtime error !
|
||||||
|
using FailedUpdateGeneration = UpdateMatrixGenerator::FailedUpdateGeneration;
|
||||||
|
|
||||||
|
FailedUpdateGeneration::FailedUpdateGeneration(size_t attempts)
|
||||||
|
: std::runtime_error("Failed to generate a valid update after " + std::to_string(attempts) +
|
||||||
|
" attempts"),
|
||||||
|
attempted_generation(attempts) {}
|
||||||
|
|
||||||
|
Matrix UpdateMatrixGenerator::iterateOnPlan(const Matrix &m, UpdateMatrix *update_matrix,
|
||||||
|
const UpdatePlan &plan) const {
|
||||||
|
// This matrix represents the matrix after the nth update
|
||||||
|
// At the beginning, it is the same as the input matrix
|
||||||
|
Matrix current_slater_matrix = m;
|
||||||
|
|
||||||
|
|
||||||
|
// Iterate on the plan, generating fitting updates
|
||||||
|
for (uint32_t rank = 0; const auto &descriptor: plan) {
|
||||||
|
|
||||||
|
// Randomly generate a potential update
|
||||||
|
const auto padded_size = update_matrix->getUpdateLength();
|
||||||
|
auto update_vec =
|
||||||
|
makePotentialUpdate(descriptor, current_slater_matrix, padded_size, noise_magnitude);
|
||||||
|
update_matrix->setUpdate(rank, update_vec, descriptor.getColumnIndex());
|
||||||
|
updateSlaterMatrix(current_slater_matrix, descriptor, update_vec);
|
||||||
|
|
||||||
|
rank++;
|
||||||
|
}
|
||||||
|
return current_slater_matrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool UpdateMatrixGenerator::tryGenerateUpdates(UpdateMatrix *update_matrix,
|
||||||
|
const UpdatePlan &plan, const Matrix &m) const {
|
||||||
|
|
||||||
|
Matrix resulting_matrix = iterateOnPlan(m, update_matrix, plan);
|
||||||
|
|
||||||
|
// Check if the final matrix is invertible or not
|
||||||
|
const double det = resulting_matrix.getLastKnownDeterminant();
|
||||||
|
if (fabs(det) < determinant_threshold) { return false; }
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateMatrixGenerator::attemptToGenerateUpdates(const UpdatePlan &plan, UpdateMatrix *res,
|
||||||
|
const Matrix &m) const {
|
||||||
|
size_t attempts = 0;
|
||||||
|
constexpr size_t max_attempts = 10;
|
||||||
|
|
||||||
|
// Attempt to generate all the updates until we find a valid series, or we reached the maximum number of attempts
|
||||||
|
while (true) {
|
||||||
|
bool success = tryGenerateUpdates(res, plan, m);
|
||||||
|
|
||||||
|
if (success) { return; }
|
||||||
|
|
||||||
|
attempts++;
|
||||||
|
if (attempts >= max_attempts) { throw FailedUpdateGeneration(attempts); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateMatrix UpdateMatrixGenerator::make(const Matrix &m, size_t y_padding,
|
||||||
|
const UpdatePlan &plan) const {
|
||||||
|
|
||||||
|
size_t update_length = m.x + y_padding;
|
||||||
|
|
||||||
|
UpdateMatrix res(plan.size(), plan.getSplitCount(), update_length);
|
||||||
|
attemptToGenerateUpdates(plan, &res, m);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace randomgen
|
@ -0,0 +1,14 @@
|
|||||||
|
|
||||||
|
#include "UpdatePlan.hpp"
|
||||||
|
|
||||||
|
namespace randomgen {
|
||||||
|
|
||||||
|
UpdatePlan::UpdatePlan(uint32_t updates_count) { updates_descriptors.resize(updates_count); }
|
||||||
|
|
||||||
|
uint32_t UpdatePlan::getSplitCount() const {
|
||||||
|
uint32_t res = 0;
|
||||||
|
for (const auto &update: updates_descriptors) { res += update.isSplitting(); }
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace randomgen
|
10
random_generator/src/main.cpp
Normal file
10
random_generator/src/main.cpp
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include "Engine.hpp"
|
||||||
|
#include "versioning.h"
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
|
std::cout << PROJECT_NAME << " version " << PROJECT_VER << std::endl;
|
||||||
|
Engine engine(argc, argv);
|
||||||
|
return engine.exec();
|
||||||
|
}
|
9
random_generator/src/versioning.h.in
Normal file
9
random_generator/src/versioning.h.in
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
// Private header used for versioning the project
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define PROJECT_NAME "@PROJECT_NAME@"
|
||||||
|
#define PROJECT_VER "@PROJECT_VERSION@"
|
||||||
|
#define PROJECT_VER_MAJOR "@PROJECT_VERSION_MAJOR@"
|
||||||
|
#define PROJECT_VER_MINOR "@PROJECT_VERSION_MINOR@"
|
||||||
|
#define PROJECT_VER_PATCH "@PROJECT_VERSION_PATCH@"
|
Loading…
Reference in New Issue
Block a user