From 59321bc06f9b5086c2a2e20747ca18c83ea85bba Mon Sep 17 00:00:00 2001 From: Thukisdo Date: Tue, 30 Aug 2022 11:31:18 +0200 Subject: [PATCH 1/2] Merged random cycle generator into the main scherman-morrison repository --- random_generator/.clang-format | 66 ++++++ random_generator/.gitignore | 14 ++ random_generator/CMakeLists.txt | 21 ++ random_generator/README.md | 131 ++++++++++++ random_generator/headers/Engine.hpp | 38 ++++ .../headers/EngineModeExecutor.hpp | 29 +++ .../headers/MatrixSizeExecutor.hpp | 17 ++ .../headers/UpdateCountExecutor.hpp | 16 ++ .../headers/cycle_generation/CApi.h | 47 +++++ .../headers/cycle_generation/Cycle.hpp | 28 +++ .../cycle_generation/CycleGenerator.hpp | 36 ++++ .../headers/cycle_generation/Matrix.hpp | 42 ++++ .../SizedBasedCycleStream.hpp | 24 +++ .../hdf5CycleOutputStream.hpp | 59 ++++++ .../ChainedUpdatePlanner.hpp | 26 +++ .../update_generation/UpdateDescriptor.hpp | 45 ++++ .../update_generation/UpdateGenerator.hpp | 74 +++++++ .../update_generation/UpdateMatrix.hpp | 148 +++++++++++++ .../UpdateMatrixGenerator.hpp | 69 ++++++ .../update_generation/UpdatePlan.hpp | 48 +++++ .../update_generation/UpdatePlanner.hpp | 14 ++ random_generator/src/CMakeLists.txt | 27 +++ random_generator/src/Engine.cpp | 70 ++++++ random_generator/src/MatrixSizeExecutor.cpp | 129 ++++++++++++ random_generator/src/UpdateCountExecutor.cpp | 126 +++++++++++ random_generator/src/c_api_test.c | 29 +++ .../src/cycle_generation/CApi.cpp | 100 +++++++++ .../src/cycle_generation/CMakeLists.txt | 14 ++ .../src/cycle_generation/CycleGenerator.cpp | 74 +++++++ .../src/cycle_generation/Matrix.cpp | 110 ++++++++++ .../SizedBasedCycleStream.cpp | 14 ++ .../hdf5CycleOutputStream.cpp | 135 ++++++++++++ .../update_generation/CMakeLists.txt | 16 ++ .../ChainedUpdatePlanner.cpp | 92 ++++++++ .../update_generation/UpdateDescriptor.cpp | 2 + .../update_generation/UpdateGenerator.cpp | 30 +++ .../update_generation/UpdateMatrix.cpp | 48 +++++ .../UpdateMatrixGenerator.cpp | 199 ++++++++++++++++++ .../update_generation/UpdatePlan.cpp | 14 ++ random_generator/src/main.cpp | 10 + random_generator/src/versioning.h.in | 9 + 41 files changed, 2240 insertions(+) create mode 100644 random_generator/.clang-format create mode 100644 random_generator/.gitignore create mode 100644 random_generator/CMakeLists.txt create mode 100644 random_generator/README.md create mode 100644 random_generator/headers/Engine.hpp create mode 100644 random_generator/headers/EngineModeExecutor.hpp create mode 100644 random_generator/headers/MatrixSizeExecutor.hpp create mode 100644 random_generator/headers/UpdateCountExecutor.hpp create mode 100644 random_generator/headers/cycle_generation/CApi.h create mode 100644 random_generator/headers/cycle_generation/Cycle.hpp create mode 100644 random_generator/headers/cycle_generation/CycleGenerator.hpp create mode 100644 random_generator/headers/cycle_generation/Matrix.hpp create mode 100644 random_generator/headers/cycle_generation/SizedBasedCycleStream.hpp create mode 100644 random_generator/headers/cycle_generation/hdf5CycleOutputStream.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/ChainedUpdatePlanner.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/UpdateDescriptor.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/UpdateGenerator.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/UpdateMatrix.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/UpdateMatrixGenerator.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/UpdatePlan.hpp create mode 100644 random_generator/headers/cycle_generation/update_generation/UpdatePlanner.hpp create mode 100644 random_generator/src/CMakeLists.txt create mode 100644 random_generator/src/Engine.cpp create mode 100644 random_generator/src/MatrixSizeExecutor.cpp create mode 100644 random_generator/src/UpdateCountExecutor.cpp create mode 100644 random_generator/src/c_api_test.c create mode 100644 random_generator/src/cycle_generation/CApi.cpp create mode 100644 random_generator/src/cycle_generation/CMakeLists.txt create mode 100644 random_generator/src/cycle_generation/CycleGenerator.cpp create mode 100644 random_generator/src/cycle_generation/Matrix.cpp create mode 100644 random_generator/src/cycle_generation/SizedBasedCycleStream.cpp create mode 100644 random_generator/src/cycle_generation/hdf5CycleOutputStream.cpp create mode 100644 random_generator/src/cycle_generation/update_generation/CMakeLists.txt create mode 100644 random_generator/src/cycle_generation/update_generation/ChainedUpdatePlanner.cpp create mode 100644 random_generator/src/cycle_generation/update_generation/UpdateDescriptor.cpp create mode 100644 random_generator/src/cycle_generation/update_generation/UpdateGenerator.cpp create mode 100644 random_generator/src/cycle_generation/update_generation/UpdateMatrix.cpp create mode 100644 random_generator/src/cycle_generation/update_generation/UpdateMatrixGenerator.cpp create mode 100644 random_generator/src/cycle_generation/update_generation/UpdatePlan.cpp create mode 100644 random_generator/src/main.cpp create mode 100644 random_generator/src/versioning.h.in diff --git a/random_generator/.clang-format b/random_generator/.clang-format new file mode 100644 index 0000000..bcfa5d3 --- /dev/null +++ b/random_generator/.clang-format @@ -0,0 +1,66 @@ +# Generated from CLion C/C++ Code Style settings +BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: None +AlignOperands: Align +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Always +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Always +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterReturnType: None +AlwaysBreakTemplateDeclarations: Yes +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: true +BreakBeforeBinaryOperators: None +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +ColumnLimit: 100 +CompactNamespaces: false +ContinuationIndentWidth: 8 +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 2 +KeepEmptyLinesAtTheStartOfBlocks: true +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: All +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PointerAlignment: Right +ReflowComments: false +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 0 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +TabWidth: 2 +UseTab: Never diff --git a/random_generator/.gitignore b/random_generator/.gitignore new file mode 100644 index 0000000..1343e0b --- /dev/null +++ b/random_generator/.gitignore @@ -0,0 +1,14 @@ + + +# Common build directories +[bB]uild +[dD]ebug +[rR]elease +# This one is often used by intellij ide +cmake-* + +# IDE related +.idea +.vscode + + diff --git a/random_generator/CMakeLists.txt b/random_generator/CMakeLists.txt new file mode 100644 index 0000000..058f817 --- /dev/null +++ b/random_generator/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.16) + +project("TREX - Scherman-Morrison random generator" VERSION "0.1.0") + +set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/headers) +set(GLOBAL_BIN_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) +set(GLOBAL_LIB_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Handle versioning +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/versioning.h.in" "${PROJECT_BINARY_DIR}/versioning.h") +include_directories(${PROJECT_BINARY_DIR}) + +find_package(HDF5 REQUIRED) +find_package(BLAS REQUIRED) +find_package(LAPACK REQUIRED) +find_package(OpenMP REQUIRED) + +add_subdirectory(src) \ No newline at end of file diff --git a/random_generator/README.md b/random_generator/README.md new file mode 100644 index 0000000..ec533a8 --- /dev/null +++ b/random_generator/README.md @@ -0,0 +1,131 @@ +![Builds](https://github.com/Thukisdo/mlkaps-random-generator/actions/workflows/cmake.yml/badge.svg) +[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity) +![Maintainer](https://img.shields.io/badge/maintainer-Thukisdo-blue) +# Random cycle generator for QMC computations + +TREX's QMCKL library provides an implementation of the Scherman-morrison formula. +This tool provides a way to generate random HDF5 datasets that can be used for +benchmarking those implementations. + +Specifically, it reproduces "cycles" that are generated by QMCKL computations. +A cycle is a sequence of row updates on a matrix, that require multiple calls to +Scherman-morrison to update the corresponding inverse matrix. + +Note that this tool main purpose is to generate splitting cycles ([More on this here](#More-on-splitting-cycles)) + +## Usage + +This project uses cmake, so first set up a build directory and build the executable + +```shell + mkdir build + cd build + cmake .. -DCMAKE_BUILD_TYPE=Release + make -j 4 +``` + +The executable can then be launched with + +```shell + cd build + ./bin/random_generator +``` + +Where generation mode is one of the following: + +- matrix_size: Generate a dataset of increasing matrix sizes, stored as + +``` +/ +|-/ +| |-/ +| | |-cycle_/ +| | |- ... +| | |-cycle_ +| | |- ... +| |-/ +|-/ +... +``` + +- update: Generate a dataset of increasing update count, stored as + +``` +/ +|-/ +| |-/ +| | |-cycle_/ +| | |- ... +| | |-cycle_ +| | |- ... +| |-/ +|-/ +... +``` + +If you require another generation mode, please open an issue or modify the main accordingly. + +# Cycle format + +Cycles are stored in the following format: + +``` +cycle_/ + |-slater_inverse_t: The transposed-inverse of the slater matrix to update + |-updates: A matrix containing all the (additive) updates to apply to the slater matrix + |-nupdates: The number of updates in the cycle + |-determinant: The determinant of the slater matrix before the updates + |-slater_matrix: The slater matrix to update + |-col_update_index: The index of the row to update in the slater matrix + |-condition_number: The condition number of the slater matrix before the updates +``` + +Note that the updates are not applied on columns of the transposed-inverse slater matrix, but on its rows. +As such, it is needed to transpose the slater matrix before applying the updates. + +# More on matrix-update cycles + +During QMC computation, a series of updates are applied on a matrix. One update affect an entire row/column +of the matrix. This series of successive updates is called a "cycle". + +Instead of computing the inverse of this matrix from scratch (which is expansive), +it is possible to update the inverse by applying the Scherman-Morrison algorithm. + +This tool generates a dataset containing a number of matrix-update cycles. + +# More on splitting cycles + +During the cycle, an update may render the matrix non-invertible. We will call such an update a "splitting update". +Some algorithm will then fail to apply the cycle, whereas smarter algorithms will succeed by delaying the update +(applying the remaining updates first), or by applying the half of the update and delaying the other half. + +Those splitting updates lead to increased computation time, and are critical for performance evaluation. + +As such, this tool is centered around generating splitting updates. + +# How it works + +The hardest case for Scherman-Morrison is when the matrix is almost-singular (splitting update). +To reproduce this case, we generate an update that render a given column colinear to another. + +A naive implementation would produce a failure after checking that the determinant is close to zero. +To fix this, we add a small noise to this update, and follow it with another update that breaks the colinearity. + +### Chained-updates + +We use a basic implementation of this algorithm called the "Chained-updates". + +It produces updates by traversing the matrix left-to-right, where splitting updates are made colinear +to the column to their right. +A normal update will then break the chain of any previous splitting updates. + +We ensure that the final update of the cycle is non-splitting, to guarantee that the final matrix +is invertible. + +### Pitfalls + +This methods has two main downfalls: + +- A cycle cannot have a single splitting update, since a normal update is required to break the chain. +- This is the best case scenario for implementation of Scherman-Morrison that postpone splitting updates. + We could make this harder by generating bi-directional chains. diff --git a/random_generator/headers/Engine.hpp b/random_generator/headers/Engine.hpp new file mode 100644 index 0000000..ea106ca --- /dev/null +++ b/random_generator/headers/Engine.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +#include "EngineModeExecutor.hpp" + +enum class EngineMode { + kMatrixSize, + kUpdateCount, +}; + +/** + * @brief Main class responsible for starting the correct generation depending on the program arguments + * + * Adding a new generation mode is as simple as adding a new enum value to EngineMode and adapting the + * exec() method + */ +class Engine { +public: + Engine(int argc, char **argv); + + int exec(); + + char *getExecutablePath() { return argv[0]; } + + const std::filesystem::path &getOutputPath() const { return dataset_output_path; } + + const char *const *getArgv() const { return argv; } + + int getArgc() const { return argc; } + +private: + char **argv; + int argc; + EngineMode mode; + std::filesystem::path dataset_output_path; +}; diff --git a/random_generator/headers/EngineModeExecutor.hpp b/random_generator/headers/EngineModeExecutor.hpp new file mode 100644 index 0000000..13aaeaa --- /dev/null +++ b/random_generator/headers/EngineModeExecutor.hpp @@ -0,0 +1,29 @@ +#pragma once + +class Engine; + +/** + * @brief Interface for classes that execute a specific generation mode + * + * Subclassing this class allows one to easily add new custom generation modes to the engine. + * (This class is purposefully simple to allow for easy subclassing, try to keep it this way) + */ +class EngineModeExecutor { +public: + explicit EngineModeExecutor(Engine *parent_engine) : parent(parent_engine) {} + + virtual ~EngineModeExecutor() = default; + + /** + * @brief Main method of the executor. This method is automatically called by the Engine class. + * @return The return code of the execution, 0 on success, non-zero on error. + */ + virtual int exec() = 0; + +protected: + /** + * The engine that owns this executor + * Can be used to fetch the program arguments, etc. + */ + Engine *parent; +}; diff --git a/random_generator/headers/MatrixSizeExecutor.hpp b/random_generator/headers/MatrixSizeExecutor.hpp new file mode 100644 index 0000000..2202392 --- /dev/null +++ b/random_generator/headers/MatrixSizeExecutor.hpp @@ -0,0 +1,17 @@ + +#pragma once + +#include "EngineModeExecutor.hpp" + +/** + * @brief Generates a dataset with matrices of increasing size and increasing number of splits. + * The total number of updates is fixed, and the number of splits is increasing. + * + * @TODO Add a method for customizing the executor's parameters + */ +class MatrixSizeExecutor : public EngineModeExecutor { +public: + using EngineModeExecutor::EngineModeExecutor; + + int exec() override; +}; \ No newline at end of file diff --git a/random_generator/headers/UpdateCountExecutor.hpp b/random_generator/headers/UpdateCountExecutor.hpp new file mode 100644 index 0000000..8056ffe --- /dev/null +++ b/random_generator/headers/UpdateCountExecutor.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "EngineModeExecutor.hpp" + +/** + * @brief Generates a dataset where cycles contain a varying number of updates. + * The number of splitting updates varies between 2 and the number of updates. + * + * @TODO Add a method for customizing the executor's parameters + */ +class UpdateCountExecutor : public EngineModeExecutor { +public: + using EngineModeExecutor::EngineModeExecutor; + + int exec() override; +}; \ No newline at end of file diff --git a/random_generator/headers/cycle_generation/CApi.h b/random_generator/headers/cycle_generation/CApi.h new file mode 100644 index 0000000..b49329c --- /dev/null +++ b/random_generator/headers/cycle_generation/CApi.h @@ -0,0 +1,47 @@ +// This file provide a simple API to generate random cycles +// The goal of this api is to provide a way for other programs to generate random cycles at runtime +// instead of requiring a pre-generated dataset + +// Don't use "pragma once" to ensure compatibility with any compiler +#ifndef C_API_HEADER_GUARD +#define C_API_HEADER_GUARD + +// This file is included from both C++ and C, so ensure everything here has C linkage +// if need be +#ifdef __cplusplus +#include +extern "C" { +#else +#include +#endif + + +typedef struct { + // Total number of updates in the cycle + uint32_t n_update; + // Dim is the number of rows + uint32_t dim; + // lds is the number of columns + uint32_t lds; + double *slater_inverse_t; + double *slater_matrix; + // Matrix containing all the updates !The updates are ADDITIVE! + double *updates; + // Determinant of the slater matrix + double determinant; + // Indices of the updates + uint32_t *col_update_index; +} Cycle; + +Cycle *generateRandomCycle(uint32_t dim, uint32_t lds, uint32_t n_update, uint32_t n_splits, + double determinant_threshold, double splitting_update_noise_magnitude); + +void freeCycle(Cycle **cycle); + + +// To match the previous opening bracket +#ifdef __cplusplus +} +#endif + +#endif diff --git a/random_generator/headers/cycle_generation/Cycle.hpp b/random_generator/headers/cycle_generation/Cycle.hpp new file mode 100644 index 0000000..a6f05cd --- /dev/null +++ b/random_generator/headers/cycle_generation/Cycle.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "Matrix.hpp" +#include "update_generation/UpdateMatrix.hpp" + +namespace randomgen { + + class Cycle { + public: + Matrix &getSlaterMatrix() { return matrix; } + + const Matrix &getSlaterMatrix() const { return matrix; } + + Matrix &getSlaterInverse() { return matrix_invt; } + + const Matrix &getSlaterInverseTransposed() const { return matrix_invt; } + + UpdateMatrix &getUpdateMatrix() { return update_array; } + + const UpdateMatrix &getUpdateMatrix() const { return update_array; } + + private: + Matrix matrix; + Matrix matrix_invt; + UpdateMatrix update_array; + }; + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/headers/cycle_generation/CycleGenerator.hpp b/random_generator/headers/cycle_generation/CycleGenerator.hpp new file mode 100644 index 0000000..81bd17d --- /dev/null +++ b/random_generator/headers/cycle_generation/CycleGenerator.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include "Cycle.hpp" +#include "Matrix.hpp" +#include "update_generation/UpdateGenerator.hpp" +#include "update_generation/UpdateMatrix.hpp" +#include + +namespace randomgen { + + class CycleGenerator { + public: + /** + * @brief Cycle generator + * @param x The size of the (squared) matrix + * @param y_padding The column padding to add to the matrix + * @param generator The generator used to generate the update vectors + * Note that this generator must be thread-safe and re-entrant + */ + CycleGenerator(size_t x, size_t y_padding, + std::shared_ptr update_generator); + + void generateCycleMatrices(Cycle *res); + + Cycle make(size_t n_update, size_t n_splits); + + private: + std::shared_ptr update_generator; + MatrixGenerator matrix_generator; + size_t x, y_padding; + + Cycle attemptToGenerateCycle(size_t n_update, size_t n_splits); + + void finalizeCycle(Cycle &res) const; + }; +}// namespace randomgen diff --git a/random_generator/headers/cycle_generation/Matrix.hpp b/random_generator/headers/cycle_generation/Matrix.hpp new file mode 100644 index 0000000..0d22214 --- /dev/null +++ b/random_generator/headers/cycle_generation/Matrix.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace randomgen { + struct Matrix { + Matrix() = default; + Matrix(size_t x, size_t y); + + Matrix(const Matrix &other); + Matrix &operator=(const Matrix &other); + + Matrix(Matrix &&other) = default; + Matrix &operator=(Matrix &&other) = default; + + Matrix inverse(); + Matrix transpose(); + + friend std::ostream &operator<<(std::ostream &os, const Matrix &m); + + double *data() { return array.get(); } + + const double *data() const { return array.get(); } + + void pad(size_t x_pad, size_t y_pad); + double computeDeterminant(); + double getLastKnownDeterminant() const; + + uint32_t x = 0, y = 0; + double determinant; + std::unique_ptr array; + }; + + class MatrixGenerator { + public: + Matrix operator()(size_t matrix_size, double vmin = -3, double vmax = 3); + }; +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/headers/cycle_generation/SizedBasedCycleStream.hpp b/random_generator/headers/cycle_generation/SizedBasedCycleStream.hpp new file mode 100644 index 0000000..83ac66c --- /dev/null +++ b/random_generator/headers/cycle_generation/SizedBasedCycleStream.hpp @@ -0,0 +1,24 @@ +#pragma once +#include "hdf5CycleOutputStream.hpp" + + +namespace randomgen { + + /** + * @brief A stream that stores cycles based on the matrices size + */ + class SizedBasedCycleStream : public hdf5CycleOutputStream { + public: + using hdf5CycleOutputStream::hdf5CycleOutputStream; + + private: + /** + * @brief Returns the path as /matrix_size/splits_count/cycle_id + * + * @param cycle + * @return + */ + std::string getPathFor(const Cycle &cycle) override; + }; + +}// namespace randomgen diff --git a/random_generator/headers/cycle_generation/hdf5CycleOutputStream.hpp b/random_generator/headers/cycle_generation/hdf5CycleOutputStream.hpp new file mode 100644 index 0000000..d433054 --- /dev/null +++ b/random_generator/headers/cycle_generation/hdf5CycleOutputStream.hpp @@ -0,0 +1,59 @@ +#pragma once + + +#include "Cycle.hpp" +#include +#include +#include +#include + + +namespace randomgen { + /** + * @brief This stream allows one to output multiple cycles to a single HDF5 file. + * The cycle storing algorithm is fixed, but this class can be subclassed to change where the cycles + * are stored in the final datasets. + * + * By default, this class stores the cycles per number of updates and number of splits. + */ + class hdf5CycleOutputStream { + public: + explicit hdf5CycleOutputStream(const std::filesystem::path &dataset_path); + + virtual ~hdf5CycleOutputStream(); + + + /** + * @brief Output a cycle to the dataset. + * @param ds The stream to output to. + * @param cycle The cycle to output + * @return The stream passed as parameter. + */ + friend hdf5CycleOutputStream &operator<<(hdf5CycleOutputStream &ds, const Cycle &cycle); + + protected: + /** + * @brief Creates the given path inside the dataset (parents folders included) + * @param path The path to create + * @return the id of the newly created group + * Freeing is the responsibility of the caller. + */ + hid_t createGroup(const std::string &path); + + hid_t file_id = -1; + + // Each cycle is represented by a unique id + // This member is used to keep track of the current id + size_t last_unique_cycle_id = 0; + + private: + /** + * Get the storage path for a given cycle. + * + * By default, this is /n_updates/n_splits + * @param cycle The cycle that will be stored + * @return The path at which the cycle must be stored. + */ + virtual std::string getPathFor(const Cycle &cycle); + }; +}// namespace randomgen diff --git a/random_generator/headers/cycle_generation/update_generation/ChainedUpdatePlanner.hpp b/random_generator/headers/cycle_generation/update_generation/ChainedUpdatePlanner.hpp new file mode 100644 index 0000000..1804ff5 --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/ChainedUpdatePlanner.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include "UpdatePlanner.hpp" + +namespace randomgen { + + /** + * @brief Generates an update plan using chained updates + * + * Updates are generated left-to-right, where each splitting update targets the column on its right. + * By ensuring that the last update is non-splitting, we can guarantee that the matrix remains invertible. + */ + class ChainedUpdatePlanBuilder : public UpdatePlanner { + public: + /** + * @brief Produce a chain-update plan + * @param update_count The total number of updates in the plan + * @param split_count The number of splitting updates in the plan + * @return A new plan with the given parameters + */ + UpdatePlan make(uint32_t update_count, uint32_t split_count) const override; + }; + +}// namespace randomgen diff --git a/random_generator/headers/cycle_generation/update_generation/UpdateDescriptor.hpp b/random_generator/headers/cycle_generation/update_generation/UpdateDescriptor.hpp new file mode 100644 index 0000000..6d92699 --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/UpdateDescriptor.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include + +namespace randomgen { + + + /** + * @brief Describes single update, including its target column, whether it splits or not... + */ + class UpdateDescriptor { + public: + UpdateDescriptor() = default; + + /** + * @brief Construct a new UpdateDescriptor object + * @param splits If true, the update must splits (produce a singular matrix) + * @param column_index The column to update + * @param target_column_index The target column to use for the split (if the splits is False, this is ignored) + * + */ + UpdateDescriptor(bool splits, uint32_t column_index, uint32_t target_column_index) + : splits(splits), column_index(column_index), target_column_index(target_column_index) {} + + bool isSplitting() const { return splits; } + + void setSplitting(bool new_split_value) { this->splits = new_split_value; } + + uint32_t getColumnIndex() const { return column_index; } + + void setColumnIndex(uint32_t new_column_index) { column_index = new_column_index; } + + uint32_t getTargetColumnIndex() const { return target_column_index; } + + void setTargetColumnIndex(uint32_t new_target_column_index) { + this->target_column_index = new_target_column_index; + } + + private: + bool splits = false; + uint32_t column_index = 0; + uint32_t target_column_index = 0; + }; + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/headers/cycle_generation/update_generation/UpdateGenerator.hpp b/random_generator/headers/cycle_generation/update_generation/UpdateGenerator.hpp new file mode 100644 index 0000000..5b06eb9 --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/UpdateGenerator.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include "UpdateMatrixGenerator.hpp" +#include "UpdatePlanner.hpp" +#include + +namespace randomgen { + + /** + * @brief Main class for the generator module, capable of planning and generating updates + * + * This class is a utility class that combines a planner and a generator, into a single entry point for the module. + * It provides simplicity for the end user, while advanced user may want to use the planner and generator separately. + */ + class UpdateGenerator { + public: + /** + * @brief Builds a new generator from a planner and an update generator + * + * Takes unique_ptr to ensure thread-safety and re-entry + * @param update_planner The planner to use for generating plans + * @param update_generator The generator used to build an UpdateMatrix from a plan + */ + UpdateGenerator(std::shared_ptr update_planner, + std::shared_ptr update_generator); + + /** + * @brief Generate a new update plan to be fed to the generator at a later time + * Allows one to generate a plan that can be reused multiple times. + * + * This function is thread-safe and re-entrant. + * @param update_count The number of updates to plan for + * @param split_count The number of splitting updates inside the plan + * @return A new plan with the given parameters + */ + UpdatePlan plan(uint32_t update_count, uint32_t split_count) const; + + /** + * @brief Run the generator on the given update plan + * + * This function is thread-safe and re-entrant. + * @param slater_matrix The slater matrix to use for the update generation + * @param y_padding The padding to add to the update matrix. + * Initially, the updates have the same length as a column of the slater matrix + * + * @param update_plan + * @return + */ + UpdateMatrix make(const Matrix &slater_matrix, uint32_t y_padding, + const UpdatePlan &update_plan) const; + + /** + * @brief Plans and generate an update matrix from scratch. + * + * Note that a new plan is created each time this function is called. For maximum performance, + * consider using the plan() function to generate a plan in situations where the same plan can be reused + * multiple times. + * + * This function is thread-safe and re-entrant. + * @param slater_matrix_t + * @param y_padding + * @param update_count + * @param split_count + * @return + */ + UpdateMatrix make(const Matrix &slater_matrix_t, uint32_t y_padding, uint32_t update_count, + uint32_t split_count) const; + + private: + std::shared_ptr planner; + std::shared_ptr generator; + }; + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/headers/cycle_generation/update_generation/UpdateMatrix.hpp b/random_generator/headers/cycle_generation/update_generation/UpdateMatrix.hpp new file mode 100644 index 0000000..01b8249 --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/UpdateMatrix.hpp @@ -0,0 +1,148 @@ +#pragma once + +#include +#include +#include +#include + +namespace randomgen { + + /** + * @brief Stores a set of updates, and provides methods to retrieve them. Include metadata about the updates, + * such as the number of splitting updates, etc.. + * + * + * The updates are stored as a matrix of size n_update x update_length. + */ + class UpdateMatrix { + private: + /** + * @brief Base class for the (Const) iterators. Provides facilities to traverse the update matrix. + * @tparam UMatrix the type of the update matrix, for const correctness + */ + template + class BaseIterator { + public: + BaseIterator(UMatrix &parent_matrix, size_t update_index) + : update_index(update_index), parent_matrix(&parent_matrix) {} + + BaseIterator operator+(int count) const { + BaseIterator res(*this); + res += count; + return res; + } + + BaseIterator &operator+=(int count) { + update_index += count; + return *this; + } + + BaseIterator &operator++() { return *this += 1; } + + // We can implement the subtraction operators using the addition operators + BaseIterator operator-(int count) const { return *this + (-count); } + + + BaseIterator &operator-=(int count) { return *this += -count; } + + BaseIterator operator--() { return *this -= 1; } + + protected: + uint32_t update_index; + UMatrix *parent_matrix; + }; + + public: + /** + * @brief Const iterator on the updates + */ + class ConstIterator : public BaseIterator { + public: + using BaseIterator::BaseIterator; + + std::span operator*() const { + if (update_index >= parent_matrix->n_update or update_index < 0) { + throw std::out_of_range( + "UpdateMatrix::ConstIterator::operator*(): update_index out of range"); + } + return parent_matrix->getUpdateSpan(update_index); + } + }; + + /** + * @brief Const iterator on the updates + */ + class Iterator : public BaseIterator { + public: + using BaseIterator::BaseIterator; + + std::span operator*() const { + if (update_index >= parent_matrix->n_update or update_index < 0) { + throw std::out_of_range( + "UpdateMatrix::ConstIterator::operator*(): update_index out of range"); + } + return parent_matrix->getUpdateSpan(update_index); + } + }; + + Iterator begin() { return {*this, 0}; } + + ConstIterator begin() const { return {*this, 0}; } + + Iterator end() { return {*this, n_update}; } + + ConstIterator end() const { return {*this, n_update}; } + + UpdateMatrix() = default; + + /** + * @brief Construct a new matrix able to store updates of the given size + * @param n_update The total number of updates in the matrix + * @param n_splits The number of splitting updates + * @param update_length The length of each update + */ + UpdateMatrix(uint32_t n_update, uint32_t n_splits, uint32_t update_length); + + UpdateMatrix(const UpdateMatrix &other); + UpdateMatrix &operator=(const UpdateMatrix &other); + + UpdateMatrix(UpdateMatrix &&other) = default; + UpdateMatrix &operator=(UpdateMatrix &&other) = default; + + + double *getRawUpdates() { return matrix.get(); } + const double *getRawUpdates() const { return matrix.get(); } + + void setUpdate(uint32_t update_rank, const std::span &update, + uint32_t column_index); + + std::span getUpdateSpan(uint32_t index) { + std::span res(getRawUpdates() + (index * update_length), update_length); + return res; + } + + std::span getUpdateSpan(uint32_t index) const { + std::span res(getRawUpdates() + (index * update_length), update_length); + return res; + } + + uint32_t getUpdateCount() const { return n_update; } + + uint32_t getUpdateLength() const { return update_length; } + + uint32_t getSplitCount() const { return n_splits; } + + uint32_t *getUpdateIndices() const { return update_index.get(); } + + private: + // The number of update in this cycle, and the number of updates that are splits + // Note that n_update > 0, and n_splits < n_update + uint32_t n_update = 0, n_splits = 0; + uint32_t update_length = 0; + + // We use unique ptr to limit the size of this class + // Thanks to this, this class fits into 32 bytes + std::unique_ptr matrix; + std::unique_ptr update_index; + }; +}// namespace randomgen diff --git a/random_generator/headers/cycle_generation/update_generation/UpdateMatrixGenerator.hpp b/random_generator/headers/cycle_generation/update_generation/UpdateMatrixGenerator.hpp new file mode 100644 index 0000000..a933d01 --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/UpdateMatrixGenerator.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include "UpdateMatrix.hpp" +#include "UpdatePlan.hpp" +#include "cycle_generation/Matrix.hpp" +#include +#include + +namespace randomgen { + + /** + * @brief Cycle updates generator that handles splitting and normal cycles + */ + class UpdateMatrixGenerator { + public: + /** + * @brief Exception thrown when the update generator fails to generate an update after a set number of attempts + */ + class FailedUpdateGeneration : std::runtime_error { + public: + explicit FailedUpdateGeneration(size_t attempts); + + /** + * @brief Get the number of attempts that were made to generate an update before failing + * @return + */ + size_t getAttemptedGenerationCount() const { return attempted_generation; } + + private: + const size_t attempted_generation; + }; + + /** + * @brief Construct a new generator with the given thresholds + * @param noise_magnitude The magnitude of the noise added to splitting updates. + * The noise is added so that the final matrix is not singular. + * If the noise is too small, Scherman-Morrison will fail, if it is too large, the update may not trigger any splits. + * Tested with a value of 1e-6 + * @param determinant_threshold + */ + explicit UpdateMatrixGenerator(double noise_magnitude, double determinant_threshold = 1e-6) + : noise_magnitude(noise_magnitude), determinant_threshold(determinant_threshold) {} + + + /** + * @brief Main function of the generator, takes a matrix as an input, and generate a series of update vectors + * Note that this method is thread-safe and re-entrant. + * @param slater_matrix The reference matrix + * @param update_plan The number of updates to generate + * @return An array of update vectors + */ + UpdateMatrix make(const Matrix &slater_matrix, size_t y_padding, + const UpdatePlan &update_plan) const; + + private: + const double noise_magnitude; + const double determinant_threshold; + + Matrix iterateOnPlan(const Matrix &m, UpdateMatrix *update_matrix, + const UpdatePlan &plan) const; + + void attemptToGenerateUpdates(const UpdatePlan &plan, UpdateMatrix *res, const Matrix &m) const; + + bool tryGenerateUpdates(UpdateMatrix *update_matrix, const UpdatePlan &plan, + const Matrix &m) const; + }; + + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/headers/cycle_generation/update_generation/UpdatePlan.hpp b/random_generator/headers/cycle_generation/update_generation/UpdatePlan.hpp new file mode 100644 index 0000000..eae3820 --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/UpdatePlan.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include "UpdateDescriptor.hpp" +#include +#include + +namespace randomgen { + + /** + * @brief Collection of UpdateDescriptors, describing all the updates in a cycle + * + * This class describes the updates contained in a cycle, as a set of UpdateDescriptors. + * It can be fed to the UpdatePlanBuilder to generate a cycle. + */ + class UpdatePlan { + public: + using Iterator = std::vector::iterator; + using ConstIterator = std::vector::const_iterator; + + UpdatePlan() = default; + + UpdatePlan(uint32_t updates_count); + + Iterator begin() { return updates_descriptors.begin(); } + + Iterator end() { return updates_descriptors.end(); } + + ConstIterator begin() const { return updates_descriptors.begin(); } + + ConstIterator end() const { return updates_descriptors.end(); } + + UpdateDescriptor &operator[](uint32_t index) { return updates_descriptors[index]; } + + const UpdateDescriptor &operator[](uint32_t index) const { return updates_descriptors[index]; } + + UpdateDescriptor &at(uint32_t index) { return updates_descriptors.at(index); } + + const UpdateDescriptor &at(uint32_t index) const { return updates_descriptors.at(index); } + + uint32_t size() const { return updates_descriptors.size(); } + + uint32_t getSplitCount() const; + + private: + std::vector updates_descriptors; + }; + +}// namespace randomgen diff --git a/random_generator/headers/cycle_generation/update_generation/UpdatePlanner.hpp b/random_generator/headers/cycle_generation/update_generation/UpdatePlanner.hpp new file mode 100644 index 0000000..041deaf --- /dev/null +++ b/random_generator/headers/cycle_generation/update_generation/UpdatePlanner.hpp @@ -0,0 +1,14 @@ +#pragma once +#include "UpdatePlan.hpp" +#include +#include + +namespace randomgen { + + class UpdatePlanner { + public: + virtual UpdatePlan make(uint32_t update_count, uint32_t split_count) const = 0; + }; + + +}// namespace randomgen diff --git a/random_generator/src/CMakeLists.txt b/random_generator/src/CMakeLists.txt new file mode 100644 index 0000000..0bcf72d --- /dev/null +++ b/random_generator/src/CMakeLists.txt @@ -0,0 +1,27 @@ + +add_subdirectory(cycle_generation) + +set(CURRENT_INCLUDE_DIR ${INCLUDE_DIR}) +add_executable(main + main.cpp + Engine.cpp ${CURRENT_INCLUDE_DIR}/Engine.hpp + ${CURRENT_INCLUDE_DIR}/EngineModeExecutor.hpp + MatrixSizeExecutor.cpp ${CURRENT_INCLUDE_DIR}/MatrixSizeExecutor.hpp + UpdateCountExecutor.cpp ${CURRENT_INCLUDE_DIR}/UpdateCountExecutor.hpp versioning.h.in ../headers/cycle_generation/CApi.h) + +target_include_directories(main PUBLIC ${CURRENT_INCLUDE_DIR}) + +set_target_properties(main PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${GLOBAL_BIN_DIRECTORY}) + +target_link_libraries(main PUBLIC cycle_generation OpenMP::OpenMP_CXX) + + +add_executable(c_api_test c_api_test.c) + +target_include_directories(c_api_test PUBLIC ${CURRENT_INCLUDE_DIR}) + +set_target_properties(c_api_test PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${GLOBAL_BIN_DIRECTORY}) + +target_link_libraries(c_api_test PUBLIC cycle_generation) \ No newline at end of file diff --git a/random_generator/src/Engine.cpp b/random_generator/src/Engine.cpp new file mode 100644 index 0000000..5c955a8 --- /dev/null +++ b/random_generator/src/Engine.cpp @@ -0,0 +1,70 @@ + +#include "Engine.hpp" +#include "MatrixSizeExecutor.hpp" +#include "UpdateCountExecutor.hpp" + +namespace { + std::unique_ptr makeExecutor(EngineMode mode, Engine *engine) { + // Create the correct executor for the given mode + // Note that the executor can use the engine to get access to the command line arguments + switch (mode) { + case EngineMode::kMatrixSize: + return std::make_unique(engine); + case EngineMode::kUpdateCount: + return std::make_unique(engine); + default: + throw std::runtime_error("Unknown engine mode"); + } + } + + void checkOutputPath(std::filesystem::path output_path) { + + if (not std::filesystem::exists(output_path)) { + output_path = std::filesystem::absolute(output_path); + // ensure that the parent path exists + std::filesystem::create_directories(output_path.parent_path()); + return; + } + + char answer = '\0'; + while (answer != 'y' and answer != 'n') { + std::cout << "Warning ! A file named " << output_path + << " Already exists !\n\tOverwrite ? (y/n) "; + std::string buffer; + std::cin >> buffer; + answer = buffer[0]; + } + + if (answer == 'n') { throw std::runtime_error("Aborted"); } + + std::filesystem::remove(output_path); + } +}// namespace + +Engine::Engine(int argc, char **argv) : argv(argv), argc(argc) { + if (argc < 3) { + std::cout << "Missing arguments" << std::endl; + std::cout << "Usage: " << argv[0] << " " + << " ()" << std::endl; + std::abort(); + } + + if (std::string(argv[2]) == "update") { + mode = EngineMode::kUpdateCount; + } else if (std::string(argv[2]) == "matrix_size") { + mode = EngineMode::kMatrixSize; + } else { + std::cout << "Error: Unknown mode: " << argv[2] << std::endl; + std::abort(); + } + + dataset_output_path = argv[1]; + checkOutputPath(dataset_output_path); +} + +int Engine::exec() { + auto executor = makeExecutor(mode, this); + auto return_code = executor->exec(); + + return return_code; +} diff --git a/random_generator/src/MatrixSizeExecutor.cpp b/random_generator/src/MatrixSizeExecutor.cpp new file mode 100644 index 0000000..cb71d03 --- /dev/null +++ b/random_generator/src/MatrixSizeExecutor.cpp @@ -0,0 +1,129 @@ + +#include "MatrixSizeExecutor.hpp" + +#include +#include +#include + +#include "ChainedUpdatePlanner.hpp" +#include "CycleGenerator.hpp" +#include "SizedBasedCycleStream.hpp" + +#include "Engine.hpp" + +using namespace randomgen; + +namespace { + + size_t dumpCyclesToDatastream(hdf5CycleOutputStream &ds, std::vector &cycle_buffer) { +#pragma omp critical + for (auto &c: cycle_buffer) { ds << c; } + + // Number of cycles that were dumped + size_t res = cycle_buffer.size(); + cycle_buffer.clear(); + return res; + } + + + void checkMultisizedParameters(const size_t starting_matrix_size, const size_t final_matrix_size, + const size_t update_count, const size_t max_split_count, + const size_t cycle_per_rank) { + if (starting_matrix_size > final_matrix_size) { + throw std::runtime_error("Starting matrix size must be smaller than final matrix size"); + } + + if (max_split_count > update_count) { + throw std::runtime_error("Update count must be smaller than max split count"); + } + + if (cycle_per_rank == 0) { throw std::runtime_error("Cycle per rank must be greater than 0"); } + + if (update_count > starting_matrix_size) { + throw std::runtime_error("Update count must be smaller than starting matrix size"); + } + } + + void generate_multisized_dataset(const std::filesystem::path &output_path, + const size_t starting_matrix_size, + const size_t final_matrix_size, const size_t update_count, + const size_t max_split_count, const size_t cycle_per_rank) { + checkMultisizedParameters(starting_matrix_size, final_matrix_size, update_count, + max_split_count, cycle_per_rank); + + + auto planner = std::make_shared(); + auto matrix_generator = std::make_shared(1e-3, 1e-6); + auto update_generator = std::make_shared(planner, matrix_generator); + + SizedBasedCycleStream ds(output_path); + + size_t total_rank_count = 0; + for (size_t i = starting_matrix_size; i <= final_matrix_size; i++) { + long splits_count = std::min(max_split_count, update_count - 1); + splits_count = std::max(splits_count, 0L); + total_rank_count += (splits_count + 1); + } + size_t generated_rank = 0; + size_t generated_cycles = 0; + + +#pragma omp parallel default(none) shared(generated_cycles, generated_rank) \ + shared(starting_matrix_size, final_matrix_size, update_generator, cycle_per_rank, \ + max_split_count, update_count, ds, total_rank_count, std::cout) + { + // Generate this amount of cycles before dumping them + // This is to avoid contention + const size_t cycle_buffer_size = std::min(256UL, 2 * cycle_per_rank); + std::vector cycle_buffer; + + + // Iterate on the matrix size +#pragma omp for schedule(dynamic) + for (size_t curr_size = starting_matrix_size; curr_size <= final_matrix_size; curr_size++) { + CycleGenerator generator(curr_size, 0, update_generator); + size_t splits_count = std::min(max_split_count, update_count - 1); + + // Iterate on the number of splits + for (size_t n_splits = 0; n_splits <= splits_count; n_splits++) { + + if (omp_get_thread_num() == 0) { + std::cout << "Generated " << generated_rank << "/" << total_rank_count << " ranks" + << "(" << generated_cycles << ")" << std::endl; + } + + // Generate cycle_per_rank matrices + for (size_t k = 0; k < cycle_per_rank; k++) { + cycle_buffer.push_back(generator.make(update_count, n_splits)); + // If we have generated enough cycles, dump them + + if (cycle_buffer.size() == cycle_buffer_size) { + // ensure the counter increase is atomic + size_t tmp = dumpCyclesToDatastream(ds, cycle_buffer); +#pragma omp atomic + generated_cycles += tmp; + } + } +#pragma omp atomic + generated_rank += 1; + } + } + } + } +}// namespace + +int MatrixSizeExecutor::exec() { + + constexpr size_t starting_matrix_size = 31; + constexpr size_t ending_matrix_size = 200; + constexpr size_t nbr_update = 31; + constexpr size_t max_split_count = nbr_update - 1; + constexpr size_t cycle_per_rank = 20; + + auto output_path = parent->getOutputPath(); + + generate_multisized_dataset(output_path, starting_matrix_size, ending_matrix_size, nbr_update, + max_split_count, cycle_per_rank); + + return 0; +} \ No newline at end of file diff --git a/random_generator/src/UpdateCountExecutor.cpp b/random_generator/src/UpdateCountExecutor.cpp new file mode 100644 index 0000000..9f361bf --- /dev/null +++ b/random_generator/src/UpdateCountExecutor.cpp @@ -0,0 +1,126 @@ + + +#include "UpdateCountExecutor.hpp" + +#include +#include +#include + +#include "ChainedUpdatePlanner.hpp" +#include "CycleGenerator.hpp" +#include "Engine.hpp" +#include "hdf5CycleOutputStream.hpp" + +using namespace randomgen; + +size_t dumpCyclesToDatastream(hdf5CycleOutputStream &ds, std::vector &cycle_buffer) { +#pragma omp critical + for (auto &c: cycle_buffer) { ds << c; } + + // Number of cycles that were dumped + size_t res = cycle_buffer.size(); + cycle_buffer.clear(); + return res; +} + + +void generateUpdates(std::filesystem::path output_path, const size_t matrix_size, + const size_t y_padding, const size_t lower_update_count, + const size_t upper_update_count, const size_t max_split_count, + const size_t cycle_per_rank) { + + + auto planner = std::make_shared(); + auto matrix_generator = std::make_shared(1e-6, 1e-6); + auto update_generator = std::make_shared(planner, matrix_generator); + CycleGenerator generator(matrix_size, y_padding, update_generator); + + hdf5CycleOutputStream ds(output_path); + + // For pretty printing the progress + // The rank is a combination of (n_update, n_splits) + size_t total_rank = 0; + size_t current_rank = 0; + size_t cycle_counter = 0; + + // Total number of ranks to compute + for (size_t i = lower_update_count; i <= upper_update_count; i++) { + size_t splits_count = std::min(max_split_count, i); + splits_count = std::max(splits_count - 2, 0UL); + total_rank += splits_count; + } + + +#pragma omp parallel default(none) \ + shared(total_rank, current_rank, cycle_counter, generator, ds, std::cout, \ + lower_update_count, upper_update_count, max_split_count, cycle_per_rank) + { + // Generate this amount of cycles before dumping them + // This is to avoid contention + const size_t cycle_buffer_size = std::min(256UL, 2 * cycle_per_rank); + std::vector cycle_buffer; + + // Some cycles take longer to generate than others + // (espcially cycles with more splits and / or more updates) + // So we use a dynamic scheduling +#pragma omp for schedule(dynamic) + for (size_t n_update = lower_update_count; n_update <= upper_update_count; n_update++) { + size_t splits_count = std::min(max_split_count, n_update - 1); + // For each update count, generate all combinations of splits number + for (size_t n_splits = 2; n_splits <= splits_count; n_splits++) { + + // Pretty print the progress :) + if (omp_get_thread_num() == 0) { + std::cout << "Generated rank " << current_rank << "/" << total_rank << " (" + << cycle_counter << ")" << std::endl; + } + + for (size_t k = 0; k < cycle_per_rank; k++) { + cycle_buffer.push_back(generator.make(n_update, n_splits)); + // If we have generated enough cycles, dump them + + if (cycle_buffer.size() == cycle_buffer_size) { + // ensure the counter increase is atomic + size_t tmp = dumpCyclesToDatastream(ds, cycle_buffer); +#pragma omp atomic + cycle_counter += tmp; + } + } +#pragma omp atomic + current_rank++; + } + } + // Dump the remaining cycles still in the buffer + size_t tmp = dumpCyclesToDatastream(ds, cycle_buffer); +#pragma omp atomic + cycle_counter += tmp; + } + std::cout << "Generated rank " << current_rank << "/" << total_rank << " (" << cycle_counter + << ")" << std::endl; +} + + +int UpdateCountExecutor::exec() { + + // The generated matrix are square matrices... + // But we can pad them with zeros to make them rectangular (useful for optimal vectorization) + constexpr size_t matrix_size = 21; + constexpr size_t y_padding = 3; + + // Start generating cycles with this number of updates + constexpr size_t lower_update_count = 1; + // The maximal number of updates per cycle to generate + constexpr size_t upper_update_count = 20; + // The maximum number of splits per cycle + // By default, this is capped to upper_update_count - 1 + constexpr size_t max_split_count = upper_update_count - 1; + // The number of cycles to generate for each "rank" + // Where the rank is the combination (update_count, split_count) + constexpr size_t cycle_per_rank = 80; + + auto output_path = parent->getOutputPath(); + + generateUpdates(output_path, matrix_size, y_padding, lower_update_count, upper_update_count, + max_split_count, cycle_per_rank); + return 0; +} diff --git a/random_generator/src/c_api_test.c b/random_generator/src/c_api_test.c new file mode 100644 index 0000000..f25c4af --- /dev/null +++ b/random_generator/src/c_api_test.c @@ -0,0 +1,29 @@ + +#include "CApi.h" +#include "stdio.h" +#include "stdlib.h" + +int main() { + Cycle *cycle = generateRandomCycle(3, 5, 3, 2, 1e-3, 1e-3); + printf("%d\n", cycle->n_update); + printf("%d\n", cycle->dim); + printf("%d\n", cycle->lds); + printf("%f\n", cycle->determinant); + // Matrix: + for (int i = 0; i < cycle->dim; i++) { + for (int j = 0; j < cycle->lds; j++) { + printf("%f ", cycle->slater_matrix[i * cycle->lds + j]); + } + printf("\n"); + } + printf("\n"); + + // update_array: + for (int i = 0; i < cycle->n_update; i++) { + printf("Update %i:\n", i); + for (int j = 0; j < cycle->lds; j++) { printf("%lf ", cycle->updates[i * cycle->lds + j]); } + printf("\n"); + } + freeCycle(&cycle); + return 0; +} \ No newline at end of file diff --git a/random_generator/src/cycle_generation/CApi.cpp b/random_generator/src/cycle_generation/CApi.cpp new file mode 100644 index 0000000..cf3c63c --- /dev/null +++ b/random_generator/src/cycle_generation/CApi.cpp @@ -0,0 +1,100 @@ +#include "CApi.h" +#include "ChainedUpdatePlanner.hpp" +#include "CycleGenerator.hpp" +#include +#include + +namespace { + + void convertMatrix(randomgen::Matrix source, double **destination) { + *destination = (double *) malloc(sizeof(double) * source.x * source.y); + std::copy(source.data(), source.data() + source.x * source.y, *destination); + } + + void convertMatrices(randomgen::Cycle &cpp_cycle, Cycle *res) {// Copy the slater matrix + // Copy the inverse slater matrix + convertMatrix(cpp_cycle.getSlaterMatrix(), &res->slater_matrix); + convertMatrix(cpp_cycle.getSlaterInverseTransposed(), &res->slater_inverse_t); + } + + + void convertUpdates(randomgen::Cycle &cpp_cycle, Cycle *res) { + auto &update_matrix = cpp_cycle.getUpdateMatrix(); + auto raw_updates = update_matrix.getRawUpdates(); + res->updates = (double *) malloc(sizeof(double) * update_matrix.getUpdateCount() * + update_matrix.getUpdateLength()); + + std::copy(raw_updates, + raw_updates + update_matrix.getUpdateCount() * update_matrix.getUpdateLength(), + res->updates); + + res->col_update_index = (uint32_t *) malloc(sizeof(uint32_t) * update_matrix.getUpdateCount()); + std::copy(update_matrix.getUpdateIndices(), + update_matrix.getUpdateIndices() + update_matrix.getUpdateCount(), + res->col_update_index); + } + + + Cycle *convertCPPCycleToC(randomgen::Cycle &cpp_cycle) { + auto *res = (Cycle *) malloc(sizeof(Cycle)); + res->lds = cpp_cycle.getSlaterMatrix().y; + res->dim = cpp_cycle.getSlaterMatrix().x; + res->n_update = cpp_cycle.getUpdateMatrix().getUpdateCount(); + res->determinant = cpp_cycle.getSlaterMatrix().getLastKnownDeterminant(); + + convertMatrices(cpp_cycle, res); + convertUpdates(cpp_cycle, res); + + return res; + } + +}// namespace + +// Those functions must have C linkage +extern "C" { + +Cycle *generateRandomCycle(uint32_t dim, uint32_t lds, uint32_t n_update, uint32_t n_splits, + double determinant_threshold, double splitting_update_noise_magnitude) { + + if (lds < dim) { + std::cerr << "generateRandomCycle: lds < dim" << std::endl; + return nullptr; + } + + auto y_padding = lds - dim; + + // We need to build a generator at every call, which is not optimal + auto planner = std::make_unique(); + auto matrix_generator = std::make_unique( + splitting_update_noise_magnitude, determinant_threshold); + auto update_generator = std::make_shared(std::move(planner), + std::move(matrix_generator)); + + + // This will generate a random cycle + // However, those class use smart pointers (that are private to the library) + // So we can't return them directly. + // This requires the additional step of converting the C++ class to a C struct. + // Which has a cost memory/performance wise + randomgen::CycleGenerator generator(dim, y_padding, update_generator); + randomgen::Cycle cpp_cycle = generator.make(n_update, n_splits); + auto converted = convertCPPCycleToC(cpp_cycle); + return converted; +} + +void freeCycle(Cycle **cycle) { + if (cycle == nullptr or *cycle == nullptr) { + std::cerr << "freeCycle: cycle is nullptr !" << std::endl; + return; + } + + + auto c = *cycle; + free(c->slater_matrix); + free(c->slater_inverse_t); + free(c->updates); + free(c->col_update_index); + free(c); + *cycle = nullptr; +} +} diff --git a/random_generator/src/cycle_generation/CMakeLists.txt b/random_generator/src/cycle_generation/CMakeLists.txt new file mode 100644 index 0000000..68a9c64 --- /dev/null +++ b/random_generator/src/cycle_generation/CMakeLists.txt @@ -0,0 +1,14 @@ + +add_subdirectory(update_generation) + +set(CURRENT_INCLUDE_DIR ${INCLUDE_DIR}/cycle_generation) +add_library(cycle_generation + ${CURRENT_INCLUDE_DIR}/Cycle.hpp + Matrix.cpp ${CURRENT_INCLUDE_DIR}/Matrix.hpp + hdf5CycleOutputStream.cpp ${CURRENT_INCLUDE_DIR}/hdf5CycleOutputStream.hpp + SizedBasedCycleStream.cpp ${CURRENT_INCLUDE_DIR}/SizedBasedCycleStream.hpp + CycleGenerator.cpp ${CURRENT_INCLUDE_DIR}/CycleGenerator.hpp + CApi.cpp) +target_include_directories(cycle_generation PUBLIC ${CURRENT_INCLUDE_DIR} ${HDF5_INCLUDE_DIRS}) +target_link_libraries(cycle_generation PUBLIC update_generation ${HDF5_LIBRARIES} + m ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} lapacke) \ No newline at end of file diff --git a/random_generator/src/cycle_generation/CycleGenerator.cpp b/random_generator/src/cycle_generation/CycleGenerator.cpp new file mode 100644 index 0000000..83c2ad8 --- /dev/null +++ b/random_generator/src/cycle_generation/CycleGenerator.cpp @@ -0,0 +1,74 @@ + + +#include "cycle_generation/CycleGenerator.hpp" + +#include + +namespace randomgen { + + CycleGenerator::CycleGenerator(size_t x, size_t y_padding, + std::shared_ptr update_generator) + : update_generator(std::move(update_generator)), x(x), y_padding(y_padding) {} + + + void CycleGenerator::generateCycleMatrices(Cycle *res) { + // Generate random matrices until we find one with a given determinant + do { + res->getSlaterMatrix() = matrix_generator(x); + } while (res->getSlaterMatrix().getLastKnownDeterminant() < 1e-5); + + res->getSlaterInverse() = res->getSlaterMatrix().inverse(); + } + + + Cycle CycleGenerator::attemptToGenerateCycle(size_t n_update, size_t n_splits) { + Cycle res{}; + size_t attempt = 0; + constexpr size_t kMaxAttempts = 20; + + auto plan = update_generator->plan(n_update, n_splits); + + while (true) { + attempt++; + generateCycleMatrices(&res); + + // Try to find a valid update vector + // If we can't, start from scratch + try { + res.getUpdateMatrix() = + update_generator->make(res.getSlaterMatrix().transpose(), y_padding, plan); + break; + } catch (UpdateMatrixGenerator::FailedUpdateGeneration &e) { + // Abort after a certain number of attempts + if (attempt > kMaxAttempts) { + throw std::runtime_error("Could not generate a valid cycle in " + + std::to_string(attempt) + " attempts"); + } + continue; + } + } + return res; + } + + void CycleGenerator::finalizeCycle(Cycle &res) const { + // The inverse in the dataset must be transposed + // this allows the code of Scherman-morrison to be vectorized since column-updates becomes row-updates + res.getSlaterInverse() = res.getSlaterInverse().transpose(); + res.getSlaterMatrix().pad(0, y_padding); + res.getSlaterInverse().pad(0, y_padding); + + for (uint32_t i = 0; i < res.getUpdateMatrix().getUpdateCount(); i++) { + res.getUpdateMatrix().getUpdateIndices()[i] += 1; + } + } + + Cycle CycleGenerator::make(size_t n_update, size_t n_splits) { + + Cycle res = attemptToGenerateCycle(n_update, n_splits); + finalizeCycle(res); + + return res; + } + + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/src/cycle_generation/Matrix.cpp b/random_generator/src/cycle_generation/Matrix.cpp new file mode 100644 index 0000000..65fd2c3 --- /dev/null +++ b/random_generator/src/cycle_generation/Matrix.cpp @@ -0,0 +1,110 @@ + +#include "cycle_generation/Matrix.hpp" +#include + +namespace randomgen { + + Matrix::Matrix(size_t x, size_t y) : x(x), y(y) { array = std::make_unique(x * y); } + + Matrix::Matrix(const Matrix &other) : Matrix(other.x, other.y) { + std::copy(other.array.get(), other.array.get() + x * y, array.get()); + determinant = other.determinant; + } + + Matrix &Matrix::operator=(const Matrix &other) { + if (this == &other) { return *this; } + // Realloc a matrix if the size is different + + if (x * y != other.x * other.y) { array = std::make_unique(other.x * other.y); } + + x = other.x; + y = other.y; + + std::copy(other.array.get(), other.array.get() + x * y, array.get()); + return *this; + } + + Matrix Matrix::inverse() { + // Return the inverse of this matrix using lapacke dgetrf + dgetri + Matrix inv = *this; + std::vector ipiv(x); + auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, x, y, inv.array.get(), x, ipiv.data()); + if (info != 0) { throw std::runtime_error("Matrix inversion failed"); } + auto info2 = LAPACKE_dgetri(LAPACK_ROW_MAJOR, x, inv.array.get(), x, ipiv.data()); + if (info2 != 0) { throw std::runtime_error("Matrix inversion failed"); } + + inv.computeDeterminant(); + return inv; + } + + Matrix Matrix::transpose() { + Matrix res(x, y); + + for (size_t i = 0; i < x; i++) { + for (size_t j = 0; j < y; j++) { res.array[j * x + i] = array[i * y + j]; } + } + res.computeDeterminant(); + + return res; + } + + std::ostream &operator<<(std::ostream &os, const Matrix &m) { + for (size_t i = 0; i < m.x; i++) { + for (size_t j = 0; j < m.y; j++) { os << m.array[i * m.y + j] << " "; } + os << std::endl; + } + return os; + } + + void Matrix::pad(size_t x_pad, size_t y_pad) { + if (x_pad == 0 && y_pad == 0) { return; } + + size_t new_size = (x + x_pad) * (y + y_pad); + std::unique_ptr padded_data = std::make_unique(new_size); + + // Set the padding to 0 + std::fill(padded_data.get(), padded_data.get() + (new_size), 0); + + // Copy the old data to the new data + for (int i = 0; i < x; i++) { + for (int j = 0; j < y; j++) { padded_data[i * (y + y_pad) + j] = array[i * y + j]; } + } + x += x_pad; + y += y_pad; + this->array = std::move(padded_data); + } + + double Matrix::computeDeterminant() { + // Returns the determinant of the input matrix using LAPACK + // Copy the matrix as lapack modifies it + std::unique_ptr copy = std::make_unique(x * y); + std::copy(array.get(), array.get() + (x * y), copy.get()); + + std::vector pivot(x); + auto info = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, x, y, copy.get(), x, pivot.data()); + if (info != 0) { + std::cout << "HERE!" << std::endl; + throw std::runtime_error("Matrix inversion failed"); + } + + double det = 1; + for (int i = 0; i < y; ++i) { det *= array[i * x + i]; } + determinant = det; + return det; + } + + double Matrix::getLastKnownDeterminant() const { return determinant; } + + + Matrix MatrixGenerator::operator()(size_t matrix_size, double vmin, double vmax) { + Matrix res{matrix_size, matrix_size}; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis(vmin, vmax); + std::generate(res.array.get(), res.array.get() + (matrix_size * matrix_size), + [&]() { return dis(gen); }); + res.computeDeterminant(); + return res; + } + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/src/cycle_generation/SizedBasedCycleStream.cpp b/random_generator/src/cycle_generation/SizedBasedCycleStream.cpp new file mode 100644 index 0000000..c780201 --- /dev/null +++ b/random_generator/src/cycle_generation/SizedBasedCycleStream.cpp @@ -0,0 +1,14 @@ + +#include "cycle_generation/SizedBasedCycleStream.hpp" + +namespace randomgen { + std::string SizedBasedCycleStream::getPathFor(const randomgen::Cycle &cycle) { + // Save the cycle based on the size of its matrix + number of splits + std::string res = "/"; + res += std::to_string(cycle.getSlaterMatrix().x) + "/"; + res += std::to_string(cycle.getUpdateMatrix().getSplitCount()) + "/"; + res += "cycle_" + std::to_string(last_unique_cycle_id); + return res; + } + +}// namespace randomgen diff --git a/random_generator/src/cycle_generation/hdf5CycleOutputStream.cpp b/random_generator/src/cycle_generation/hdf5CycleOutputStream.cpp new file mode 100644 index 0000000..da8efca --- /dev/null +++ b/random_generator/src/cycle_generation/hdf5CycleOutputStream.cpp @@ -0,0 +1,135 @@ +#include "cycle_generation/hdf5CycleOutputStream.hpp" + +namespace fs = std::filesystem; + +namespace randomgen { + + + namespace { + + void writeDeterminant(const Cycle &cycle, + hid_t cycle_gid) {// Write the determinant of the matrix + hid_t det_space = H5Screate_simple(1, std::vector{1}.data(), nullptr); + hid_t determinant_loc = + H5Dcreate1(cycle_gid, "determinant", H5T_NATIVE_DOUBLE, det_space, H5P_DEFAULT); + if (determinant_loc < 0) { throw std::runtime_error("H5Dcreate1"); } + + double determinant = cycle.getSlaterMatrix().getLastKnownDeterminant(); + + H5Dwrite(determinant_loc, H5T_NATIVE_DOUBLE, det_space, H5S_ALL, H5S_ALL, &determinant); + H5Dclose(determinant_loc); + H5Sclose(det_space); + } + + void writeMatrix(const Matrix &mat, hid_t group_id, hid_t slater_space, + const std::string &dataset_name) { + hid_t slater_matrix_loc = H5Dcreate1(group_id, dataset_name.c_str(), H5T_NATIVE_DOUBLE, + slater_space, H5P_DEFAULT); + if (slater_matrix_loc < 0) { throw std::runtime_error("H5Dcreate1"); } + + H5Dwrite(slater_matrix_loc, H5T_NATIVE_DOUBLE, slater_space, H5S_ALL, H5S_ALL, mat.data()); + H5Dclose(slater_matrix_loc); + } + + + void writeMatrices(const Cycle &cycle, hid_t group_gid) { + unsigned int y_size = cycle.getSlaterInverseTransposed().y, + x_size = cycle.getSlaterInverseTransposed().x; + + // Write both the matrix and its inverse + auto slater_space = H5Screate_simple(2, std::vector{x_size, y_size}.data(), nullptr); + writeMatrix(cycle.getSlaterMatrix(), group_gid, slater_space, "slater_matrix"); + writeMatrix(cycle.getSlaterInverseTransposed(), group_gid, slater_space, "slater_inverse_t"); + H5Sclose(slater_space); + + writeDeterminant(cycle, group_gid); + } + + void writeUpdatesMetadata(const Cycle &cycle, hid_t cycle_gid) {// Write the number of updates + hid_t updates_count_space = H5Screate_simple(1, std::vector{1}.data(), nullptr); + hid_t updates_count_loc = H5Dcreate1(cycle_gid, "nupdates", H5T_NATIVE_UINT32, + updates_count_space, H5P_DEFAULT); + if (updates_count_loc < 0) { throw std::runtime_error("H5Dcreate1"); } + + auto update_count = cycle.getUpdateMatrix().getUpdateCount(); + H5Dwrite(updates_count_loc, H5T_NATIVE_UINT32, updates_count_space, H5S_ALL, H5S_ALL, + &update_count); + H5Dclose(updates_count_loc); + H5Sclose(updates_count_space); + + + // Write the columns of the updates matrix + hid_t updates_index = H5Screate_simple( + 1, std::vector{cycle.getUpdateMatrix().getUpdateCount()}.data(), nullptr); + hid_t updates_index_loc = H5Dcreate1(cycle_gid, "col_update_index", H5T_NATIVE_UINT32, + updates_index, H5P_DEFAULT); + if (updates_index_loc < 0) { throw std::runtime_error("H5Dcreate1"); } + H5Dwrite(updates_index_loc, H5T_NATIVE_UINT32, updates_index, H5S_ALL, H5S_ALL, + cycle.getUpdateMatrix().getUpdateIndices()); + H5Dclose(updates_index_loc); + H5Sclose(updates_index); + } + + void writeUpdateMatrix(const Cycle &cycle, hid_t cycle_gid) {// Write the updates matrix + unsigned int y_size = cycle.getSlaterInverseTransposed().y; + + hid_t update_space = H5Screate_simple( + 2, std::vector{cycle.getUpdateMatrix().getUpdateCount(), y_size}.data(), + nullptr); + hid_t update_loc = + H5Dcreate1(cycle_gid, "updates", H5T_NATIVE_DOUBLE, update_space, H5P_DEFAULT); + if (update_loc < 0) { throw std::runtime_error("H5Dcreate1"); } + H5Dwrite(update_loc, H5T_NATIVE_DOUBLE, update_space, H5S_ALL, H5S_ALL, + cycle.getUpdateMatrix().getRawUpdates()); + H5Dclose(update_loc); + H5Sclose(update_space); + } + + void writeUpdates(const Cycle &cycle, hid_t cycle_id) { + writeUpdatesMetadata(cycle, cycle_id); + writeUpdateMatrix(cycle, cycle_id); + } + + }// namespace + + hdf5CycleOutputStream::hdf5CycleOutputStream(const fs::path &dataset_path) { + // Create a new file + file_id = H5Fcreate(dataset_path.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + if (file_id < 0) { throw std::runtime_error("Could not create dataset file"); } + } + + hdf5CycleOutputStream::~hdf5CycleOutputStream() { + if (file_id >= 0) { H5Fclose(file_id); } + file_id = -1; + } + + hid_t hdf5CycleOutputStream::createGroup(const std::string &path) { + // Needed to create the parent directories automatically + auto lcpl = H5Pcreate(H5P_LINK_CREATE); + H5Pset_create_intermediate_group(lcpl, 1); + hid_t group_id = H5Gcreate2(file_id, path.c_str(), lcpl, H5P_DEFAULT, H5P_DEFAULT); + H5Pclose(lcpl); + return group_id; + } + + std::string hdf5CycleOutputStream::getPathFor(const Cycle &cycle) { + std::string res = "/"; + res += std::to_string(cycle.getUpdateMatrix().getUpdateCount()) + "/"; + res += std::to_string(cycle.getUpdateMatrix().getSplitCount()) + "/"; + res += "cycle_" + std::to_string(last_unique_cycle_id); + return res; + } + + hdf5CycleOutputStream &operator<<(hdf5CycleOutputStream &ds, const Cycle &cycle) { + // Build the path to the new dataset inside the file + std::string cycle_path = ds.getPathFor(cycle); + hid_t group_id = ds.createGroup(cycle_path); + ds.last_unique_cycle_id++; + + writeMatrices(cycle, group_id); + writeUpdates(cycle, group_id); + + H5Gclose(group_id); + return ds; + } +}// namespace randomgen diff --git a/random_generator/src/cycle_generation/update_generation/CMakeLists.txt b/random_generator/src/cycle_generation/update_generation/CMakeLists.txt new file mode 100644 index 0000000..d9197d1 --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/CMakeLists.txt @@ -0,0 +1,16 @@ + +set(CURRENT_INCLUDE_DIR ${INCLUDE_DIR}/cycle_generation/update_generation) +add_library(update_generation + UpdateDescriptor.cpp ${CURRENT_INCLUDE_DIR}/UpdateDescriptor.hpp + UpdateMatrix.cpp ${CURRENT_INCLUDE_DIR}/UpdateMatrix.hpp + UpdatePlan.cpp ${CURRENT_INCLUDE_DIR}/UpdatePlan.hpp + ${CURRENT_INCLUDE_DIR}/UpdatePlanner.hpp + UpdateMatrixGenerator.cpp ${CURRENT_INCLUDE_DIR}/UpdateGenerator.hpp + UpdateGenerator.cpp ${CURRENT_INCLUDE_DIR}/UpdateGenerator.hpp + ChainedUpdatePlanner.cpp ${CURRENT_INCLUDE_DIR}/ChainedUpdatePlanner.hpp + ) +set_target_properties(update_generation PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${GLOBAL_LIB_DIRECTORY} LIBRARY_OUTPUT_DIRECTORY ${GLOBAL_LIB_DIRECTORY} + ) + +target_include_directories(update_generation PUBLIC ${CURRENT_INCLUDE_DIR} ${INCLUDE_DIR}) \ No newline at end of file diff --git a/random_generator/src/cycle_generation/update_generation/ChainedUpdatePlanner.cpp b/random_generator/src/cycle_generation/update_generation/ChainedUpdatePlanner.cpp new file mode 100644 index 0000000..134b3d1 --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/ChainedUpdatePlanner.cpp @@ -0,0 +1,92 @@ + +#include "ChainedUpdatePlanner.hpp" +#include +#include + +namespace randomgen { + + + namespace { + + /** + * @brief Iterate on a plan and reorders the split to ensure the last update does not split + * @param plan + */ + void reorderSplits(UpdatePlan *plan) { + + // Find the first non-splitting updates + size_t first_non_split = -1; + for (size_t i = 0; i < plan->size(); ++i) { + if (not plan->operator[](i).isSplitting()) { + first_non_split = i; + break; + } + } + + if (first_non_split == -1) { + throw std::runtime_error("Reordering Failure: No non-splitting update available for " + "reordering was found in the plan"); + } + + // Swap the first non-splitting update with the last splitting update + plan->operator[](plan->size() - 1).setSplitting(false); + plan->operator[](first_non_split).setSplitting(true); + } + + /** + * @brief Check the order of the splitting updates in the plan, and maybe reorder them if the last update is + * a splitting update + * + * @param plan + * @param splits + */ + void maybeReorderSplits(UpdatePlan *plan, const size_t splits) { + if (plan->size() < 2 or splits == 0) { return; } + + // Ensure that the cycle is broken by the last update + // Should look something like [..., splits, doesn't splits] + bool invalid_chain = plan->at(plan->size() - 1).isSplitting(); + if (not invalid_chain) { return; } + + // If the chain is invalid, reorder the splits + reorderSplits(plan); + } + + /** + * @brief Make a plan where updates are generated left-to-right, and splitting updates are based on the right column + * This allows the colinearity of the updates to be broken by the next update, effectively ensuring that the final matrix + * is not singular + * + * @param plan + * @param splits + */ + void makeChainedUpdatePlan(UpdatePlan *plan, const size_t n_update, const size_t splits) { + + std::vector split_order(plan->size(), false); + for (size_t i = 0; i < splits; ++i) { split_order[i] = true; } + + std::shuffle(split_order.begin(), split_order.end(), std::mt19937{std::random_device{}()}); + + for (size_t i = 0; i < plan->size(); ++i) { + auto &update_desc = plan->at(i); + update_desc.setSplitting(split_order[i]); + // Always split by + update_desc.setTargetColumnIndex(i + 1); + update_desc.setColumnIndex(i); + } + + maybeReorderSplits(plan, splits); + } + + }// namespace + + UpdatePlan ChainedUpdatePlanBuilder::make(uint32_t update_count, uint32_t split_count) const { + if (split_count >= update_count) + throw std::runtime_error("Cannot have more splits than updates"); + + UpdatePlan res(update_count); + makeChainedUpdatePlan(&res, update_count, split_count); + return res; + } + +}// namespace randomgen diff --git a/random_generator/src/cycle_generation/update_generation/UpdateDescriptor.cpp b/random_generator/src/cycle_generation/update_generation/UpdateDescriptor.cpp new file mode 100644 index 0000000..a422524 --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/UpdateDescriptor.cpp @@ -0,0 +1,2 @@ + +#include "UpdateDescriptor.hpp" diff --git a/random_generator/src/cycle_generation/update_generation/UpdateGenerator.cpp b/random_generator/src/cycle_generation/update_generation/UpdateGenerator.cpp new file mode 100644 index 0000000..a6702db --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/UpdateGenerator.cpp @@ -0,0 +1,30 @@ + +#include "UpdateGenerator.hpp" + +#include + +namespace randomgen { + + UpdateGenerator::UpdateGenerator(std::shared_ptr update_planner, + std::shared_ptr update_generator) + : planner(std::move(update_planner)), generator(std::move(update_generator)) {} + + + UpdatePlan UpdateGenerator::plan(uint32_t update_count, uint32_t split_count) const { + return planner->make(update_count, split_count); + } + + UpdateMatrix UpdateGenerator::make(const Matrix &slater_matrix, uint32_t y_padding, + const UpdatePlan &update_plan) const { + return generator->make(slater_matrix, y_padding, update_plan); + } + + + UpdateMatrix UpdateGenerator::make(const Matrix &slater_matrix_t, uint32_t y_padding, + uint32_t update_count, uint32_t split_count) const { + auto new_plan = plan(update_count, split_count); + return make(slater_matrix_t, y_padding, new_plan); + } + + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/src/cycle_generation/update_generation/UpdateMatrix.cpp b/random_generator/src/cycle_generation/update_generation/UpdateMatrix.cpp new file mode 100644 index 0000000..de54a27 --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/UpdateMatrix.cpp @@ -0,0 +1,48 @@ + +#include "UpdateMatrix.hpp" + + +namespace randomgen { + + UpdateMatrix::UpdateMatrix(const UpdateMatrix &other) + : UpdateMatrix(other.n_update, other.n_splits, other.update_length) {} + + UpdateMatrix::UpdateMatrix(uint32_t n_update, uint32_t n_splits, uint32_t update_length) + : n_update(n_update), n_splits(n_splits), update_length(update_length) { + matrix = std::make_unique(n_update * update_length); + update_index = std::make_unique(n_update); + } + + UpdateMatrix &UpdateMatrix::operator=(const UpdateMatrix &other) { + if (this == &other) { return *this; } + + // Reallocate arrays if needed + if (n_update * update_length != other.n_update * other.update_length) { + matrix = std::make_unique(other.n_update * other.update_length); + } + + if (n_update != other.n_update) { update_index = std::make_unique(other.n_update); } + + n_update = other.n_update; + n_splits = other.n_splits; + update_length = other.update_length; + + std::copy(other.matrix.get(), other.matrix.get() + n_update * update_length, matrix.get()); + std::copy(other.update_index.get(), other.update_index.get() + n_update, update_index.get()); + return *this; + } + + void UpdateMatrix::setUpdate(uint32_t update_rank, const std::span &update, + uint32_t column_index) { + // Fetch the view where the update must be stored inside the update matrix + auto update_span = getUpdateSpan(update_rank); + + if (update.size() > update_span.size()) { + throw std::runtime_error("Update vector is too large"); + } + + std::copy(update.begin(), update.end(), update_span.begin()); + update_index.get()[update_rank] = column_index; + } + +}// namespace randomgen \ No newline at end of file diff --git a/random_generator/src/cycle_generation/update_generation/UpdateMatrixGenerator.cpp b/random_generator/src/cycle_generation/update_generation/UpdateMatrixGenerator.cpp new file mode 100644 index 0000000..00b8dac --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/UpdateMatrixGenerator.cpp @@ -0,0 +1,199 @@ + +#include "UpdateMatrixGenerator.hpp" + +namespace randomgen { + + namespace { + + + std::vector tryGenerateNonSplittingUpdate(const UpdateDescriptor &descriptor, + const Matrix ¤t_matrix) { + + size_t kMaxAttempts = 100; + size_t attempts = 0; + + Matrix copy = current_matrix; + std::vector update(current_matrix.x); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis(0, 5); + + while (true) { + // Generate a new update vector + std::generate(update.begin(), update.end(), [&]() { return dis(gen); }); + + // Check if the update vector is a valid update + for (int i = 0; i < current_matrix.x; i++) { + copy.array[i * current_matrix.y + descriptor.getColumnIndex()] += update[i]; + } + + // After the update is applied, the determinant of the matrix should be > 10e-3 + double absolute_det = std::abs(copy.computeDeterminant()); + if (absolute_det > 1e-3) { break; } + + // Reverse the update + for (int i = 0; i < current_matrix.x; i++) { + copy.array[i * current_matrix.y + descriptor.getColumnIndex()] -= update[i]; + } + + attempts++; + if (attempts > kMaxAttempts) { + throw UpdateMatrixGenerator::FailedUpdateGeneration(attempts); + } + } + + return update; + } + + std::vector &addRandomNoise(std::vector &update, const double noise_magnitude) { + // We add a noise of magnitude 1e-6 to the update vector so the resulting columns are not perfectly collinear + // Which ensures the matrix stays invertible. + + // First generate a random vector + std::vector noise(update.size()); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis(0, 100); + + std::generate(noise.begin(), noise.end(), [&]() { return dis(gen); }); + + // Compute the norm + double norm = 0; + for (size_t i = 0; i < update.size(); i++) { norm += noise[i] * noise[i]; } + + // Coefficient to have a magnitude of 1e-6 + // No need to update the noise vector, just multiply it in place + norm = noise_magnitude / sqrt(norm); + for (size_t i = 0; i < update.size(); i++) { update[i] += noise[i] * norm; } + return update; + } + + std::vector generate_splitting_update(const UpdateDescriptor &descriptor, + const Matrix &m, const double noise_magnitude) { + + size_t target_index = descriptor.getTargetColumnIndex(); + + constexpr double lambda = 1; + std::vector update(m.x); + for (size_t i = 0; i < m.x; i++) { + update[i] = (lambda * m.array[i * m.y + target_index]) // Target column + - m.array[i * m.y + descriptor.getColumnIndex()];// Current column + } + + // Add a small random noise to the update vector to break the singularity + update = addRandomNoise(update, noise_magnitude); + + return update; + } + + /** + * The updates may be padded with extra zeros, so we need to check the generated update size + */ + void maybePadUpdate(std::vector &update_vec, uint32_t padded_size) { + if (padded_size == update_vec.size()) return; + + if (padded_size < update_vec.size()) + throw std::runtime_error("Padded update size is smaller than the original update size"); + + size_t old_size = update_vec.size(); + update_vec.resize(padded_size); + for (size_t i = old_size; i < padded_size; i++) { update_vec[i] = 0; } + } + + void updateSlaterMatrix( + Matrix &slater_matrix, const UpdateDescriptor &descriptor, + const std::vector & + update_vec) {// We found the correct update vector, so return it + update the current matrix + for (int i = 0; i < slater_matrix.y; i++) { + slater_matrix.array[i * slater_matrix.y + descriptor.getColumnIndex()] += update_vec[i]; + } + } + + std::vector makePotentialUpdate(const UpdateDescriptor &descriptor, + const Matrix &slater_matrix, uint32_t padded_size, + double noise_magnitude) { + + std::vector res; + if (descriptor.isSplitting()) { + res = generate_splitting_update(descriptor, slater_matrix, noise_magnitude); + } else { + res = tryGenerateNonSplittingUpdate(descriptor, slater_matrix); + } + + maybePadUpdate(res, padded_size); + return res; + } + + }// namespace + + // Alias for the exception throw when the generation fails to find a correct update. + // This exception is not make for runtime error ! + using FailedUpdateGeneration = UpdateMatrixGenerator::FailedUpdateGeneration; + + FailedUpdateGeneration::FailedUpdateGeneration(size_t attempts) + : std::runtime_error("Failed to generate a valid update after " + std::to_string(attempts) + + " attempts"), + attempted_generation(attempts) {} + + Matrix UpdateMatrixGenerator::iterateOnPlan(const Matrix &m, UpdateMatrix *update_matrix, + const UpdatePlan &plan) const { + // This matrix represents the matrix after the nth update + // At the beginning, it is the same as the input matrix + Matrix current_slater_matrix = m; + + + // Iterate on the plan, generating fitting updates + for (uint32_t rank = 0; const auto &descriptor: plan) { + + // Randomly generate a potential update + const auto padded_size = update_matrix->getUpdateLength(); + auto update_vec = + makePotentialUpdate(descriptor, current_slater_matrix, padded_size, noise_magnitude); + update_matrix->setUpdate(rank, update_vec, descriptor.getColumnIndex()); + updateSlaterMatrix(current_slater_matrix, descriptor, update_vec); + + rank++; + } + return current_slater_matrix; + } + + bool UpdateMatrixGenerator::tryGenerateUpdates(UpdateMatrix *update_matrix, + const UpdatePlan &plan, const Matrix &m) const { + + Matrix resulting_matrix = iterateOnPlan(m, update_matrix, plan); + + // Check if the final matrix is invertible or not + const double det = resulting_matrix.getLastKnownDeterminant(); + if (fabs(det) < determinant_threshold) { return false; } + return true; + } + + void UpdateMatrixGenerator::attemptToGenerateUpdates(const UpdatePlan &plan, UpdateMatrix *res, + const Matrix &m) const { + size_t attempts = 0; + constexpr size_t max_attempts = 10; + + // Attempt to generate all the updates until we find a valid series, or we reached the maximum number of attempts + while (true) { + bool success = tryGenerateUpdates(res, plan, m); + + if (success) { return; } + + attempts++; + if (attempts >= max_attempts) { throw FailedUpdateGeneration(attempts); } + } + } + + UpdateMatrix UpdateMatrixGenerator::make(const Matrix &m, size_t y_padding, + const UpdatePlan &plan) const { + + size_t update_length = m.x + y_padding; + + UpdateMatrix res(plan.size(), plan.getSplitCount(), update_length); + attemptToGenerateUpdates(plan, &res, m); + + return res; + } + +}// namespace randomgen diff --git a/random_generator/src/cycle_generation/update_generation/UpdatePlan.cpp b/random_generator/src/cycle_generation/update_generation/UpdatePlan.cpp new file mode 100644 index 0000000..05654d0 --- /dev/null +++ b/random_generator/src/cycle_generation/update_generation/UpdatePlan.cpp @@ -0,0 +1,14 @@ + +#include "UpdatePlan.hpp" + +namespace randomgen { + + UpdatePlan::UpdatePlan(uint32_t updates_count) { updates_descriptors.resize(updates_count); } + + uint32_t UpdatePlan::getSplitCount() const { + uint32_t res = 0; + for (const auto &update: updates_descriptors) { res += update.isSplitting(); } + return res; + } + +}// namespace randomgen diff --git a/random_generator/src/main.cpp b/random_generator/src/main.cpp new file mode 100644 index 0000000..0d065b2 --- /dev/null +++ b/random_generator/src/main.cpp @@ -0,0 +1,10 @@ +#include "Engine.hpp" +#include "versioning.h" +#include + +int main(int argc, char *argv[]) { + + std::cout << PROJECT_NAME << " version " << PROJECT_VER << std::endl; + Engine engine(argc, argv); + return engine.exec(); +} diff --git a/random_generator/src/versioning.h.in b/random_generator/src/versioning.h.in new file mode 100644 index 0000000..5cb32f8 --- /dev/null +++ b/random_generator/src/versioning.h.in @@ -0,0 +1,9 @@ +// Private header used for versioning the project + +#pragma once + +#define PROJECT_NAME "@PROJECT_NAME@" +#define PROJECT_VER "@PROJECT_VERSION@" +#define PROJECT_VER_MAJOR "@PROJECT_VERSION_MAJOR@" +#define PROJECT_VER_MINOR "@PROJECT_VERSION_MINOR@" +#define PROJECT_VER_PATCH "@PROJECT_VERSION_PATCH@" \ No newline at end of file From 871b3e1def5b11eb9cd879ce8772e67015664673 Mon Sep 17 00:00:00 2001 From: Thukisdo Date: Tue, 30 Aug 2022 11:36:59 +0200 Subject: [PATCH 2/2] Added back compilation GitHub action --- .../workflows/random_generator_compile.yml | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/random_generator_compile.yml diff --git a/.github/workflows/random_generator_compile.yml b/.github/workflows/random_generator_compile.yml new file mode 100644 index 0000000..1bc6b2b --- /dev/null +++ b/.github/workflows/random_generator_compile.yml @@ -0,0 +1,33 @@ +name: Compiles + +on: + push: + branches: [ "main", "dev" ] + pull_request: + branches: [ "main", "dev" ] + +env: + BUILD_TYPE: Release + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Fetch dependencies + run: | + sudo apt-get update + sudo apt-get install -yq g++-10 cmake libomp-dev liblapacke-dev libhdf5-dev libblas-dev libopenblas-dev + + + - name: Configure CMake + run: | + cmake -B ${{github.workspace}}/random_generator/build ${{github.workspace}}/random_generator \ + -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=g++-10 -DCMAKE_C_COMPILER=gcc-10 + + - name: Build + # Build your program with the given configuration + run: | + cmake --build ${{github.workspace}}/random_generator/build --config ${{env.BUILD_TYPE}}