up version

option to rename taxonomies
proper taxonomy management, no more tags only
2023-06-23 16:47:17 +02:00 · 2023-06-23 15:11:01 +02:00 · 2023-06-23 11:47:22 +02:00 · 2023-06-23 11:14:14 +02:00 · 2023-06-23 11:12:40 +02:00 · 2023-06-23 09:59:30 +02:00
14 changed files with 1186 additions and 484 deletions
--- a/README.md
+++ b/README.md
@ -106,6 +106,9 @@ prepend_h1: false # Add title of articles as Markdown h1, looks better on certai
 #     dest: title
 #     repr: "{} _" # (this is the default repr)
 move_fields: []
 # Some taxonomies (Spip Mots types) to not export, typically specific to Spip functions
 ignore_taxonomies: ["Gestion du site", "Gestion des articles", "Mise en page"]
 rename_taxonomies: { equipes: "tag-equipes" } # Rename taxonomies (prenvent conflict)
 # Ignored data settings
 export_drafts: true # Should we export drafts
--- a/poetry.lock
+++ b/poetry.lock
@ -103,17 +103,6 @@ files = [
    {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
 ]
 [[package]]
 name = "typing-extensions"
 version = "4.6.3"
 description = "Backported and Experimental Type Hints for Python 3.7+"
 optional = false
 python-versions = ">=3.7"
 files = [
    {file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"},
    {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"},
 ]
 [[package]]
 name = "unidecode"
 version = "1.3.6"
@ -128,4 +117,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "aa57e8d6b431eddf09f5e5fe90196a965781c9b020aeb0d8ac3b7c5bd34a51a4"
+content-hash = "b2f6a06875c1c40404e891bf9765fab11ecf7fbf04a486962c27f71b3084857a"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "spip2md"
-version = "0.1.0"
+version = "0.1.1"
 description = "Generate a static website with plain Markdown+YAML files from a SPIP CMS database"
 license = "GPL-2.0"
@ -27,7 +27,6 @@ pyyaml = "^6.0"
 python-slugify = {extras = ["unidecode"], version = "^8.0.1"}
 pymysql = "^1.0.3"
 peewee = "^3.16.2"
 typing-extensions = "^4.6.3"
 [tool.poetry.scripts]
 spip2md = "spip2md.lib:cli"
--- a/spip2md/init.py
+++ b/spip2md/init.py
@ -1,31 +0,0 @@
 """
 This file is part of spip2md.
 Copyright (C) 2023 LCPQ/Guilhem Fauré
 spip2md is free software: you can redistribute it and/or modify it under the terms of
 the GNU General Public License version 2 as published by the Free Software Foundation.
 spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 This file main purpose is to initialise the logging infrastructure of Python.
 As the __init__.py file, this file is executed whenever the package is imported.
 """
 # pyright: strict
 import logging
 from os.path import isfile
 LOGFILE: str = "log-spip2md.log"  # File where logs will be written, relative to wd
 LOGLEVEL: str = "WARNING"  # Minimum criticity of logs written in logfile
 # Configure logging
 # __import__("os").remove(LOGFILE) # Clear log ?
 if isfile(LOGFILE):  # Break 2 lines before new log if there’s already one
    with open(LOGFILE, "a") as f:
        f.write("\n\n")
 logging.basicConfig(encoding="utf-8", filename=LOGFILE, level=LOGLEVEL)  # Init
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -1,7 +1,7 @@
 #!python
 import sys
-from spip2md.cli import main
+from spip2md.lib import cli
 # if __name__ == '__main__': # It IS main
-sys.exit(main(*sys.argv))
+sys.exit(cli())
--- a/spip2md/config.py
+++ b/spip2md/config.py
@ -20,42 +20,15 @@ from typing import Optional
 from yaml import Loader, load
 NAME: str = "spip2md"  # Name of program, notably used in logs
 # Global configuration object
 class Configuration:
    # config_file: Optional[str] = None  # Location of the config file
-    name: str = "spip2md"  # Name of program, notably used in logs
+# Searches for a configuration file from all CLI args and in standard locations
-
+# & return his path if found
-    db: str = "spip"  # DB name
+def config(*start_locations: str) -> Optional[str]:
-    db_host: str = "localhost"  # Where is the DB
+    # Search for config files in CLI arguments and function params first
-    db_user: str = "spip"  # A DB user with read access to SPIP database
+    argv = __import__("sys").argv
-    db_pass: str = "password"  # Password of db_user
+    config_locations: list[str] = argv[1:] + list(start_locations)
    data_dir: str = "IMG/"  # The directory in which SPIP images & documents are stored
    export_languages = ("fr", "en")  # Languages that will be exported
    storage_language: Optional[str] = "fr"  # Language of files and directories names
    output_dir: str = "output/"  # The directory to which DB will be exported
    prepend_h1: bool = False  # Add the title of the article as a Markdown h1
    move_fields: list[dict[str, str]] = []  # Alternative destination for fields
    prepend_id: bool = False  # Add the ID of object before slug
    prepend_lang: bool = False  # Add the lang of object before slug
    export_drafts: bool = True  # Should we export drafts as draft:true articles
    export_empty: bool = True  # Should we export empty articles
    remove_html: bool = True  # Should spip2md remove every HTML tags
    metadata_markup: bool = False  # Should spip2md keep the markup in metadata fields
    title_max_length: int = 40  # Maximum length of a single title for directory names
    unknown_char_replacement: str = "??"  # Replaces unknown characters
    clear_log: bool = True  # Clear log before every run instead of appending to
    clear_output: bool = True  # Remove eventual output dir before running
    ignore_patterns: list[str] = []  # Ignore objects of which title match
    export_filetype: str = "md"  # Extension of exported text files
    debug: bool = False  # Enable debug mode
    # Searches for a configuration file from standard locations or params
    def _find_config_file(self, *start_locations: str) -> str:
        # Search for config files in function params first
        config_locations: list[str] = list(start_locations)
    if "XDG_CONFIG_HOME" in environ:
        config_locations += [
@ -77,20 +50,45 @@ class Configuration:
        "/spip2md.yaml",
    ]
        # Return the first path that actually exists
    for path in config_locations:
        if isfile(path):
                # self.config_file = path
            return path
        # If not found, raise error
        raise FileNotFoundError
-    def __init__(self, *argv: str):
+
-        try:
+# Global configuration object
 class Configuration:
    db: str = "spip"  # DB name
    db_host: str = "localhost"  # Where is the DB
    db_user: str = "spip"  # A DB user with read access to SPIP database
    db_pass: str = "password"  # Password of db_user
    data_dir: str = "IMG/"  # The directory in which SPIP images & documents are stored
    export_languages = ("fr", "en")  # Languages that will be exported
    storage_language: Optional[str] = "fr"  # Language of files and directories names
    output_dir: str = "output/"  # The directory to which DB will be exported
    prepend_h1: bool = False  # Add the title of the article as a Markdown h1
    move_fields: list[dict[str, str]] = []  # Alternative destination for fields
    prepend_id: bool = False  # Add the ID of object before slug
    prepend_lang: bool = False  # Add the lang of object before slug
    export_drafts: bool = True  # Should we export drafts as draft:true articles
    export_empty: bool = True  # Should we export empty articles
    remove_html: bool = True  # Should spip2md remove every HTML tags
    ignore_taxonomies = ("Gestion du site", "Gestion des articles", "Mise en page")
    rename_taxonomies: dict[str, str] = {"equipes": "tag-equipes"}
    metadata_markup: bool = False  # Should spip2md keep the markup in metadata fields
    title_max_length: int = 40  # Maximum length of a single title for directory names
    unknown_char_replacement: str = "??"  # Replaces unknown characters
    clear_log: bool = True  # Clear log before every run instead of appending to
    clear_output: bool = True  # Remove eventual output dir before running
    ignore_patterns: list[str] = []  # Ignore objects of which title match
    logfile: str = "log-spip2md.log"  # File where logs will be written, relative to wd
    loglevel: str = "WARNING"  # Minimum criticity of logs written in logfile
    export_filetype: str = "md"  # Extension of exported text files
    debug_meta: bool = False  # Include more metadata from SPIP DB in frontmatters
    def __init__(self, config_file: Optional[str] = None):
        if config_file is not None:
            # Read config from config file
-            with open(self._find_config_file(*argv[1:])) as f:
+            with open(config_file) as f:
                # Tell user about config
                print(f"Read configuration file from {f.name}")
                config = load(f.read(), Loader=Loader)
            # Assign configuration for each attribute in config file
            for attr in config:
@ -102,5 +100,6 @@ class Configuration:
                    setattr(self, attr, directory)
                else:
                    setattr(self, attr, config[attr])
-        except FileNotFoundError:
+
-            print("No configuration file found, using defaults")
+
 CFG = Configuration(config())
--- a/spip2md/convert.py
+++ b/spip2md/convert.py
@ -1,258 +0,0 @@
 """
 This file is part of spip2md.
 Copyright (C) 2023 LCPQ/Guilhem Fauré
 spip2md is free software: you can redistribute it and/or modify it under the terms of
 the GNU General Public License version 2 as published by the Free Software Foundation.
 spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 This file contains the core classes of spip2md that models internal objects of spip
 and methods to convert them to Markdown + YAML, static site structure
 """
 import logging
 from os.path import basename, splitext
 from typing_extensions import Self
 from slugify import slugify
 from spip2md.config import Configuration
 from spip2md.spip_models import (
    SpipArticles,
    SpipAuteurs,
    SpipAuteursLiens,
    SpipDocuments,
    SpipDocumentsLiens,
    SpipMots,
    SpipMotsLiens,
    SpipRubriques,
 )
 class ConvertableDocument:
    _log_c: logging.Logger  # Logger for conversion operations
    _cfg: Configuration  # Global configuration
    _spip_obj: SpipDocuments  # The Spip Article this is representing
    # Converted fields
    _src: str  # URL
    _slug: str = ""  # URL
    _id: int
    class Meta:
        table_name: str = "spip_document"  # Define the name of the Spip DB table
    def __init__(self, spip_obj: SpipDocuments, cfg: Configuration):
        self._log_c = logging.getLogger(cfg.name + ".convert.document")
        self._cfg = cfg
        self._spip_obj = spip_obj
        self._id = int(spip_obj.id_document)  # type: ignore
        # Define source name of this file
        self._src = cfg.data_dir + spip_obj.fichier
        # Define destination name of this file
        name, filetype = splitext(basename(str(spip_obj.fichier)))
        prepend: str = str(spip_obj.id_document) + "-" if self._cfg.prepend_id else ""
        self._slug = slugify(prepend + name, max_length=cfg.title_max_length) + filetype
 class ConvertableRedactional:
    _log_c: logging.Logger  # Logger for conversion operations
    _cfg: Configuration  # Global configuration
    _spip_obj: SpipArticles | SpipRubriques  # The Spip Article this is representing
    _depth: int  # Depth
    _children: dict[tuple[str, int], ConvertableDocument] = {}  # Children
    _id: int
    _lang: str
    _authors: tuple[SpipAuteurs, ...]
    _tags: tuple[SpipMots, ...]
    # Initialize documents related to self
    def documents(
        self, limit: int = 10**3
    ) -> dict[tuple[str, int], ConvertableDocument]:
        self._log_c.debug(
            "Initialize documents.\n"
            + f"Section: {self._spip_obj.titre}, Depth : {self._depth}"
        )
        documents = [
            ConvertableDocument(doc, self._cfg)
            for doc in (
                SpipDocuments.select()
                .join(
                    SpipDocumentsLiens,
                    on=(SpipDocuments.id_document == SpipDocumentsLiens.id_document),
                )
                .where(SpipDocumentsLiens.id_objet == self._id)
                .limit(limit)
            )
        ]
        # Store them mutably
        return {("document", d._id): d for d in documents}
    # Initialize self authors
    def authors(self) -> tuple[SpipAuteurs, ...]:
        self._log_c.debug("Initialize authors")
        return (
            SpipAuteurs.select()
            .join(
                SpipAuteursLiens,
                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
            )
            .where(SpipAuteursLiens.id_objet == self._id)
        )
    # Initialize self tags
    def tags(self) -> tuple[SpipMots]:
        self._log_c.debug("Initialize tags")
        return (
            SpipMots.select()
            .join(
                SpipMotsLiens,
                on=(SpipMots.id_mot == SpipMotsLiens.id_mot),
            )
            .where(SpipMotsLiens.id_objet == self._id)
        )
 class ConvertableArticle(ConvertableRedactional):
    _fileprefix: str = "index"
    # Converted fields
    _surtitle: str  # Content
    _title: str  # Content
    _subtitle: str  # Content
    _description: str  # Content
    _caption: str  # Content
    _extra: str  # Content
    _text: str  # Content
    _slug: str  # URL
    class Meta:
        table_name: str = "spip_articles"  # Define the name of the Spip DB table
    def __init__(self, spip_obj: SpipArticles, cfg: Configuration, depth: int):
        self._log_c = logging.getLogger(cfg.name + ".convert.article")
        self._cfg = cfg
        self._spip_obj = spip_obj
        self._id = int(spip_obj.id_article)  # type: ignore # Peewee types not defined
        self._lang = str(spip_obj.lang)
        self._depth = depth
        self._draft = spip_obj.statut != "publie"
        self._children |= self.documents()  # Retreive documents & add them to the index
    # Return children and itself in order to be indexed by the parent
    def index(
        self,
    ) -> dict[tuple[str, int], tuple[str, int]]:
        return {child_key: ("article", self._id) for child_key in self._children}
 # Define Section as an Article that can contain other Articles or Sections
 class ConvertableSection(ConvertableRedactional):
    _fileprefix: str = "_index"  # Prefix of written Markdown files
    # sub-sections, documents, articles
    _children: dict[
        tuple[str, int], "ConvertableSection | ConvertableArticle | ConvertableDocument"
    ] = {}
    # Routing table to objects
    _index: dict[tuple[str, int], tuple[str, int]] = {}
    class Meta:
        table_name: str = "spip_rubriques"  # Define the name of the Spip DB table
    # Get articles of this section
    def articles(self, limit: int = 10**6):
        self._log_c.debug(
            "Initialize articles.\n"
            + f"Section: {self._spip_obj.titre}, Depth : {self._depth}"
        )
        articles = [
            ConvertableArticle(art, self._cfg, self._depth)
            for art in (
                SpipArticles.select()
                .where(SpipArticles.id_rubrique == self._id)
                .order_by(SpipArticles.date.desc())
                .limit(limit)
            )
        ]
        # Add these articles and their children to self index
        for article in articles:
            self._index |= article.index()
        # Store them mutably
        return {("article", art._id): art for art in articles}
    # Get subsections of this section
    def sections(self, limit: int = 10**6):
        self._log_c.debug(
            "Initialize subsections of\n"
            + f"section {self._spip_obj.titre} of depth {self._depth}"
        )
        sections = [
            ConvertableSection(sec, self._cfg, self._depth)
            for sec in (
                SpipRubriques.select()
                .where(SpipRubriques.id_parent == self._id)
                .order_by(SpipRubriques.date.desc())
                .limit(limit)
            )
        ]
        # Add these sections’s indexes to self index, replacing next hop with section
        for section in sections:
            self._index |= {
                obj_key: ("section", section._id) for obj_key in section._index
            }
        # Store them mutably
        return {("section", sec._id): sec for sec in sections}
    def __init__(self, spip_obj: SpipRubriques, cfg: Configuration, parent_depth: int):
        self._log_c = logging.getLogger(cfg.name + ".convert.section")
        self._cfg = cfg
        self._spip_obj = spip_obj
        self._id = int(spip_obj.id_rubrique)  # type: ignore
        self._lang = str(spip_obj.lang)
        self._depth = parent_depth + 1
        self._children |= self.documents()
        self._children |= self.articles()
        self._children |= self.sections()
 # The "root" element representing the whole converted site
 class ConvertableSite:
    _log_c: logging.Logger  # Logger for conversion operations
    _cfg: Configuration  # Global configuration
    _children: dict[tuple[str, int], ConvertableSection] = {}  # Root sections
    _index: dict[tuple[str, int], tuple[str, int]] = {}  # Routing table to objects
    _id: int = 0  # Parent ID of root sections
    _depth: int = 0  # Depth
    def sections(self) -> dict[tuple[str, int], ConvertableSection]:
        self._log_c.debug("Initialize ROOT sections")
        # Get all sections of parentID root_id
        sections = [
            ConvertableSection(sec, self._cfg, self._depth)
            for sec in (
                SpipRubriques.select()
                .where(SpipRubriques.id_parent == self._id)
                .order_by(SpipRubriques.date.desc())
            )
        ]
        # Add these sections’s indexes to self index, replacing next hop with section
        # do this while outputting it as the children
        def sec_to_index(section: ConvertableSection):
            for obj_key in section._index:
                self._index[obj_key] = ("section", section._id)
            return ("section", section._id)
        return {sec_to_index(subsection): subsection for subsection in sections}
    def __init__(self, cfg: Configuration) -> None:
        self._log_c = logging.getLogger(cfg.name + ".convert.site")
        self._cfg = cfg
        self._children |= self.sections()
--- a/spip2md/extended_models.py
+++ b/spip2md/extended_models.py
@ -0,0 +1,934 @@
 """
 This file is part of spip2md.
 Copyright (C) 2023 LCPQ/Guilhem Fauré
 spip2md is free software: you can redistribute it and/or modify it under the terms of
 the GNU General Public License version 2 as published by the Free Software Foundation.
 spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 """
 import logging
 from os import listdir, mkdir
 from os.path import basename, isfile, splitext
 from re import I, Match, Pattern, finditer, match, search
 from re import error as re_error
 from shutil import copyfile
 from typing import Any, Optional
 from peewee import (
    BigAutoField,
    BigIntegerField,
    DateTimeField,
    DoesNotExist,
 )
 from slugify import slugify
 from yaml import dump
 from spip2md.config import CFG, NAME
 from spip2md.regexmaps import (
    ARTICLE_LINK,
    BLOAT,
    CONFIG_LANGS,
    DOCUMENT_LINK,
    HTMLTAGS,
    IMAGE_LINK,
    ISO_UTF,
    MULTILANG_BLOCK,
    SECTION_LINK,
    SPECIAL_OUTPUT,
    SPIP_MARKDOWN,
    UNKNOWN_ISO,
    WARNING_OUTPUT,
 )
 from spip2md.spip_models import (
    SpipArticles,
    SpipAuteurs,
    SpipAuteursLiens,
    SpipDocuments,
    SpipDocumentsLiens,
    SpipMots,
    SpipMotsLiens,
    SpipRubriques,
 )
 from spip2md.style import BOLD, CYAN, GREEN, WARNING_STYLE, YELLOW, esc
 DeepDict = dict[str, "list[DeepDict] | list[str] | str"]
 # Define logger for this file’s logs
 LOG = logging.getLogger(NAME + ".models")
 # Define type that images can have
 IMG_TYPES = ("jpg", "png", "jpeg", "gif", "webp", "ico")
 class SpipWritable:
    # From SPIP database
    texte: str
    lang: str
    titre: str
    descriptif: str
    statut: str
    profondeur: int
    # Converted fields
    _storage_title: str  # Title with which directories names are built
    _draft: bool
    # Additional fields
    _id: BigAutoField | int = 0  # same ID attribute name for all objects
    _depth: int  # Equals `profondeur` for sections
    _fileprefix: str  # String to prepend to written files
    _storage_parentdir: str  # Path from output dir to direct parent
    _style: tuple[int, ...]  # _styles to apply to some elements of printed output
    _storage_title_append: int = 0  # Append a number to storage title if > 0
    # Apply a mapping from regex maps
    @staticmethod
    def apply_mapping(text: str, mapping: tuple, keep_markup: bool = True) -> str:
        if type(mapping) == tuple and len(mapping) > 0:
            if type(mapping[0]) == tuple and len(mapping[0]) > 0:
                if type(mapping[0][0]) == Pattern:  # Mostly for syntax conversion
                    for old, new in mapping:
                        if keep_markup:
                            text = old.sub(new, text)
                        else:
                            try:
                                text = old.sub(r"\1", text)
                            except re_error:
                                text = old.sub("", text)
                else:
                    for old, new in mapping:  # Mostly for broken encoding
                        text = text.replace(old, new)
            elif type(mapping[0]) == Pattern:
                for old in mapping:
                    text = old.sub("", text)
            else:
                for old in mapping:
                    text = text.replace(old, "")
        return text
    # Warn about unknown chars & replace them with config defined replacement
    def warn_unknown(self, text: str, unknown_mapping: tuple) -> str:
        # Return unknown char surrounded by context_length chars
        def unknown_chars_context(text: str, char: str, context_len: int = 24) -> str:
            context: str = r".{0," + str(context_len) + r"}"
            m = search(
                context + r"(?=" + char + r")" + char + context,
                text,
            )
            if m is not None:
                return m.group()
            else:
                return char
        for char in unknown_mapping:
            lastend: int = 0
            for m in finditer("(" + char + ")+", text):
                context: str = unknown_chars_context(text[lastend:], char)
                LOG.warn(
                    f"Unknown char {char} found in {self.titre[:40]} at: {context}"
                )
                if CFG.unknown_char_replacement is not None:
                    LOG.warn(
                        f"Replacing {m.group()} with {CFG.unknown_char_replacement}"
                    )
                    text = text.replace(m.group(), CFG.unknown_char_replacement, 1)
                lastend = m.end()
        return text
    # Apply needed methods on text fields
    def convert_field(self, field: Optional[str], keep_markup: bool = True) -> str:
        if field is None:
            return ""
        if len(field) == 0:
            return ""
        # Convert SPIP syntax to Markdown
        field = self.apply_mapping(field, SPIP_MARKDOWN, keep_markup)
        # Remove useless text
        field = self.apply_mapping(field, BLOAT)
        # Convert broken ISO encoding to UTF
        field = self.apply_mapping(field, ISO_UTF)
        if CFG.remove_html:
            # Delete remaining HTML tags in body WARNING
            field = self.apply_mapping(field, HTMLTAGS)
        # Warn about unknown chars
        field = self.warn_unknown(field, UNKNOWN_ISO)
        return field.strip()  # Strip whitespaces around text
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Initialize converted fields beginning with underscore
        self._description: str = self.convert_field(self.descriptif)
        self._draft = self.statut != "publie"
    # Apply post-init conversions and cancel the export if self not of the right lang
    def convert(self) -> None:
        self._storage_title = self.convert_field(self.titre)
        if not CFG.export_drafts and self._draft:
            raise DontExportDraftError(f"{self.titre} is a draft, cancelling export")
    def dest_directory(self) -> str:
        raise NotImplementedError("Subclasses need to implement directory()")
    def dest_filename(self, prepend: str = "", append: str = "") -> str:
        raise NotImplementedError(
            f"Subclasses need to implement dest_filename(), params:{prepend}{append}"
        )
    def dest_path(self) -> str:
        return self.dest_directory() + self.dest_filename()
    # Print one or more line(s) in which special elements are stylized
    def style_print(
        self, string: str, indent: Optional[str] = "  ", end: str = "\n"
    ) -> str:
        stylized: str = string
        for o in SPECIAL_OUTPUT:
            stylized = o.sub(esc(*self._style) + r"\1" + esc(), stylized)
        for w in WARNING_OUTPUT:
            stylized = w.sub(esc(*WARNING_STYLE) + r"\1" + esc(), stylized)
        if indent is not None and len(indent) > 0:
            stylized = indent * self._depth + stylized
        print(stylized, end=end)
        # Return the stylized string in case
        return stylized
    # Print the message telling what is going to be done
    def begin_message(self, index: int, limit: int, step: int = 100) -> str:
        # Output the remaining number of objects to export every step object
        if index % step == 0 and limit > 0:
            counter: str = f"Exporting {limit-index} level {self._depth}"
            s: str = "s" if limit - index > 1 else ""
            if hasattr(self, "lang"):
                counter += f" {self.lang}"
            counter += f" {type(self).__name__}{s}"
            # Print the output as the program goes
            self.style_print(counter)
        # Output the counter & title of the object being exported
        msg: str = f"{index + 1}. "
        if len(self._storage_title) == 0:
            msg += "EMPTY NAME"
        else:
            msg += self._storage_title
        # Print the output as the program goes
        # LOG.debug(f"Begin exporting {type(self).__name__} {output[-1]}")
        self.style_print(msg, end="")
        return msg
    # Write object to output destination
    def write(self) -> str:
        raise NotImplementedError("Subclasses need to implement write()")
    # Output information about file that was just exported
    def end_message(self, message: str | Exception) -> str:
        output: str = " -> "
        if type(message) is FileNotFoundError:
            output += "ERROR: NOT FOUND: "
        elif type(message) is DoesNotExist:
            output += "ERROR: NO DESTINATION DIR: "
        elif type(message) is DontExportDraftError:
            output += "ERROR: NOT EXPORTING DRAFT: "
        elif type(message) is DontExportEmptyError:
            output += "ERROR: NOT EXPORTING EMPTY: "
        elif type(message) is not str:
            output += "ERROR: UNKNOWN: "
        # Print the output as the program goes
        # LOG.debug(f"Finished exporting {type(self).__name__}: {message}")
        self.style_print(output + str(message), indent=None)
        return output + str(message)
    # Perform all the write steps of this object
    def write_all(
        self,
        parentdepth: int,
        storage_parentdir: str,
        index: int,
        total: int,
        parenturl: str,
    ) -> str:
        LOG.debug(f"Writing {type(self).__name__} `{self._storage_title}`")
        self._depth = parentdepth + 1
        self._storage_parentdir = storage_parentdir
        self._parenturl = parenturl
        output: str = self.begin_message(index, total)
        try:
            output += self.end_message(self.write())
        except (
            LangNotFoundError,
            DontExportDraftError,
            DontExportEmptyError,
            IgnoredPatternError,
            FileNotFoundError,
        ) as err:
            output += self.end_message(err)
        return output
 class Document(SpipWritable, SpipDocuments):
    _fileprefix: str = ""
    _style = (BOLD, CYAN)  # Documents accent color is blue
    class Meta:
        table_name: str = "spip_documents"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._id = self.id_document
    # Get source name of this file
    def src_path(self, data_dir: Optional[str] = None) -> str:
        if data_dir is None:
            return CFG.data_dir + self.fichier
        return data_dir + self.fichier
    # Get directory of this object
    def dest_directory(self, prepend: str = "", append: str = "") -> str:
        _id: str = str(self._id) + "-" if CFG.prepend_id else ""
        return (
            self._storage_parentdir
            + prepend
            + slugify(_id + self._storage_title, max_length=100)
            + append
        )
    # Get destination slugified name of this file
    def dest_filename(self, prepend: str = "", append: str = "") -> str:
        name, filetype = splitext(basename(str(self.fichier)))
        return slugify(prepend + name, max_length=100) + append + filetype
    # Write document to output destination
    def write(self) -> str:
        # Copy the document from it’s SPIP location to the new location
        return copyfile(self.src_path(), self.dest_path())
    # Perform all the write steps of this object
    def write_all(
        self,
        parentdepth: int,
        storage_parentdir: str,
        index: int,
        total: int,
        forcedlang: Optional[str] = None,
        parenturl: str = "",
    ) -> str:
        self.convert()  # Apply post-init conversions
        LOG.debug(
            f"Document {self._storage_title} doesn’t care about forcedlang {forcedlang}"
        )
        LOG.debug(
            f"Document {self._storage_title} doesn’t care about parenturl {parenturl}"
        )
        return super().write_all(
            parentdepth, storage_parentdir, index, total, parenturl
        )
 class IgnoredPatternError(Exception):
    pass
 class LangNotFoundError(Exception):
    pass
 class DontExportDraftError(Exception):
    pass
 class DontExportEmptyError(Exception):
    pass
 class SpipRedactional(SpipWritable):
    id_trad: BigIntegerField | BigAutoField | int
    id_rubrique: BigAutoField | int
    # date: DateTimeField | str
    date: DateTimeField
    maj: str
    id_secteur: BigIntegerField | int
    extra: str
    langue_choisie: str
    # Converted
    _text: str
    _taxonomies: dict[str, list[str]] = {}
    _url_title: str  # Title in metadata of articles
    _parenturl: str  # URL relative to lang to direct parent
    _static_img_path: Optional[str] = None  # Path to the static img of this article
    # Get rid of other lang than forced in text and modify lang to forced if found
    def translate_multi(
        self, forced_lang: str, text: str, change_lang: bool = True
    ) -> str:
        # LOG.debug(f"Translating <multi> blocks of `{self._url_title}`")
        # for each <multi> blocks, keep only forced lang
        lang: Optional[Match[str]] = None
        for block in MULTILANG_BLOCK.finditer(text):
            lang = CONFIG_LANGS[forced_lang].search(block.group(1))
            if lang is not None:
                # Log the translation
                trans: str = lang.group(1)[:50].strip()
                LOG.debug(
                    f"Keeping {forced_lang} translation of `{self._url_title}`: "
                    + f"`{trans}`"
                )
                if change_lang:
                    self.lang = forced_lang  # So write-all will not be cancelled
                # Replace the mutli blocks with the text in the proper lang
                text = text.replace(block.group(), lang.group(1))
            else:
                # Replace the mutli blocks with the text inside
                text = text.replace(block.group(), block.group(1))
        if lang is None:
            LOG.debug(f"{forced_lang} not found in `{self._url_title}`")
        return text
    def replace_links(self, text: str) -> str:
        class LinkMappings:
            _link_types = IMAGE_LINK, DOCUMENT_LINK, SECTION_LINK, ARTICLE_LINK
            def __iter__(self):
                self._type_cursor = 0
                self._link_cursor = -1
                return self
            @staticmethod
            def getdocument(obj_id: int) -> Document:
                doc: Document = Document.get(Document.id_document == obj_id)
                doc.convert()
                return doc
            @staticmethod
            def getsection(obj_id: int) -> Section:
                sec: Section = Section.get(Section.id_rubrique == obj_id)
                sec.convert(self.lang)
                return sec
            @staticmethod
            def getarticle(obj_id: int) -> Article:
                art: Article = Article.get(Article.id_article == obj_id)
                art.convert(self.lang)
                return art
            _obj_getters = getdocument, getdocument, getsection, getarticle
            def __next__(self):
                self._link_cursor += 1
                # If we reach end of current link type, pass to the beginning of next
                if self._link_cursor >= len(self._link_types[self._type_cursor]):
                    self._link_cursor = 0
                    self._type_cursor += 1
                if self._type_cursor >= len(self._link_types):
                    raise StopIteration
                return (
                    self._link_types[self._type_cursor][self._link_cursor],
                    self._obj_getters[self._type_cursor],
                    "!" if self._type_cursor == 0 else "",
                )
        for link, getobj, prepend in LinkMappings():
            # LOG.debug(f"Looking for {link} in {text}")
            for m in link.finditer(text):
                LOG.debug(f"Found internal link {m.group()} in {self._url_title}")
                try:
                    LOG.debug(
                        f"Searching for object of id {m.group(2)} with "
                        + getobj.__name__
                    )
                    o: "Document | Article | Section" = getobj(int(m.group(2)))
                    # TODO get full relative path for sections and articles
                    # TODO rewrite links markup (bold/italic) after stripping
                    if len(m.group(1)) > 0:
                        repl = f"{prepend}[{m.group(1)}]({o.dest_filename()})"
                    else:
                        repl = f"{prepend}[{o._storage_title}]({o.dest_filename()})"
                    LOG.debug(
                        f"Translate link {m.group()} to {repl} in {self._url_title}"
                    )
                    text = text.replace(m.group(), repl)
                except DoesNotExist:
                    LOG.warn(f"No object for link {m.group()} in {self._url_title}")
                    text = text.replace(m.group(), prepend + "[](NOT FOUND)", 1)
        return text
    # Get this object url, or none if it’s the same as directory
    def url(self) -> str:
        _id: str = str(self._id) + "-" if CFG.prepend_id else ""
        counter: str = (
            "_" + str(self._storage_title_append)
            if self._storage_title_append > 0
            else ""
        )
        # Return none if url will be the same as directory
        return (
            self._parenturl
            + slugify(_id + self._url_title, max_length=CFG.title_max_length)
            + counter
            + r"/"
        )
    # Get slugified directory of this object
    def dest_directory(self) -> str:
        _id: str = str(self._id) + "-" if CFG.prepend_id else ""
        counter: str = (
            "_" + str(self._storage_title_append)
            if self._storage_title_append > 0
            else ""
        )
        directory: str = self._storage_parentdir + slugify(
            _id + self._storage_title,
            max_length=CFG.title_max_length,
        )
        return directory + counter + r"/"
    # Get filename of this object
    def dest_filename(self) -> str:
        return self._fileprefix + "." + self.lang + "." + CFG.export_filetype
    def convert_title(self, forced_lang: str) -> None:
        LOG.debug(f"Convert title of currently untitled {type(self).__name__}")
        if hasattr(self, "_title"):
            LOG.debug(f"{type(self).__name__} {self._url_title} _title is already set")
            return
        if self.titre is None:
            LOG.debug(f"{type(self).__name__} title is None")
            self._url_title = ""
            return
        if len(self.titre) == 0:
            LOG.debug(f"{type(self).__name__} title is empty")
            self._url_title = ""
            return
        self._url_title = self.titre.strip()
        # Set storage title to language of storage lang if different
        storage_lang: str = (
            CFG.storage_language if CFG.storage_language is not None else forced_lang
        )
        LOG.debug(
            f"Searching for {storage_lang} in <multi> blocks of `{self._url_title}`"
            + " storage title"
        )
        self._storage_title = self.translate_multi(
            storage_lang,
            self._url_title,
            False,
        )
        LOG.debug(
            f"Searching for {forced_lang} in <multi> blocks of `{self._url_title}`"
            + " URL title"
        )
        self._url_title = self.translate_multi(forced_lang, self._url_title)
        LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` title")
        self._storage_title = self.replace_links(self._storage_title)
        self._url_title = self.replace_links(self._url_title)
        LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
        self._storage_title = self.convert_field(self._storage_title)
        self._url_title = self.convert_field(self._url_title, CFG.metadata_markup)
        for p in CFG.ignore_patterns:
            for title in (self._storage_title, self._url_title):
                m = match(p, title, I)
                if m is not None:
                    raise IgnoredPatternError(
                        f"{self._url_title} matches with ignore pattern {p}, ignoring"
                    )
    def convert_text(self, forced_lang: str) -> None:
        LOG.debug(f"Convert text of `{self._url_title}`")
        if hasattr(self, "_text"):
            LOG.debug(f"{type(self).__name__} {self._url_title} _text is already set")
            return
        if self.texte is None:
            LOG.debug(f"{type(self).__name__} {self._url_title} text is None")
            self._text = ""
            return
        if len(self.texte) == 0:
            LOG.debug(f"{type(self).__name__} {self._url_title} text is empty")
            self._text = ""
            return
        self._text = self.translate_multi(forced_lang, self.texte.strip())
        LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` text")
        self._text = self.replace_links(self._text)
        LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` text")
        self._text = self.convert_field(self._text)
    def convert_extra(self) -> None:
        LOG.debug(f"Convert extra of `{self._url_title}`")
        if hasattr(self, "_extra"):
            LOG.debug(f"{type(self).__name__} {self._url_title} _extra is already set")
            return
        if self.extra is None:
            LOG.debug(f"{type(self).__name__} {self._url_title} extra is None")
            self._extra = ""
            return
        if len(self.extra) == 0:
            LOG.debug(f"{type(self).__name__} {self._url_title} extra is empty")
            self._extra = ""
            return
        LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` extra")
        self._extra = self.replace_links(self._extra)
        LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` extra")
        self._extra = self.convert_field(self._extra, CFG.metadata_markup)
    def convert_taxonomies(self, forcedlang: str) -> None:
        self._taxonomies = {}
        for tag in self.taxonomies():
            taxonomy = str(tag.type)
            if taxonomy not in CFG.ignore_taxonomies:
                LOG.debug(
                    f"Translate taxonomy of `{self._url_title}`: {tag.descriptif}"
                )
                if taxonomy in CFG.rename_taxonomies:
                    LOG.debug(
                        f"Rename taxonomy {taxonomy}: {CFG.rename_taxonomies[taxonomy]}"
                    )
                    taxonomy = CFG.rename_taxonomies[taxonomy]
                if str(taxonomy) in self._taxonomies:
                    self._taxonomies[taxonomy].append(
                        self.convert_field(
                            self.translate_multi(forcedlang, str(tag.descriptif), False)
                        )
                    )
                else:
                    self._taxonomies[taxonomy] = [
                        self.convert_field(
                            self.translate_multi(forcedlang, str(tag.descriptif), False)
                        )
                    ]
        LOG.debug(
            f"After translation, taxonomies of `{self._url_title}`: {self._taxonomies}"
        )
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Initialize converted fields, beginning with underscore
        self._choosen_language = self.langue_choisie == "oui"
    # Get related documents
    def documents(self) -> tuple[Document]:
        LOG.debug(f"Initialize documents of `{self._url_title}`")
        documents = (
            Document.select()
            .join(
                SpipDocumentsLiens,
                on=(Document.id_document == SpipDocumentsLiens.id_document),
            )
            .where(SpipDocumentsLiens.id_objet == self._id)
        )
        return documents
    # Get the YAML frontmatter string
    def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
        # LOG.debug(f"Write frontmatter of `{self._title}`")
        meta: dict[str, Any] = {
            "lang": self.lang,
            "translationKey": self.id_trad if self.id_trad != 0 else self._id,
            "title": self._url_title,
            "publishDate": self.date,
            "lastmod": self.maj,
            "draft": self._draft,
            "description": self._description,
        }
        # Add debugging meta if needed
        if CFG.debug_meta:
            meta = meta | {
                "spip_id": self._id,
                "spip_id_secteur": self.id_secteur,
            }
        # Add url if different of directory
        if self.url() not in self.dest_directory():
            meta = meta | {"url": self.url()}
        if append is not None:
            return dump(meta | append, allow_unicode=True)
        else:
            return dump(meta, allow_unicode=True)
    # Get file text content
    def content(self) -> str:
        # LOG.debug(f"Write content of `{self._title}`")
        # Start the content with frontmatter
        body: str = "---\n" + self.frontmatter() + "---"
        # Add the title as a Markdown h1
        if self._url_title is not None and len(self._url_title) > 0 and CFG.prepend_h1:
            body += "\n\n# " + self._url_title
        # If there is a text, add the text preceded by two line breaks
        if len(self._text) > 0:
            # Remove remaining HTML after & append to body
            body += "\n\n" + self._text
        elif not CFG.export_empty:
            raise DontExportEmptyError
        # Same with an "extra" section
        if len(self._extra) > 0:
            body += "\n\n# EXTRA\n\n" + self._extra
        return body
    def authors(self) -> tuple[SpipAuteurs, ...]:
        LOG.debug(f"Initialize authors of `{self._url_title}`")
        return (
            SpipAuteurs.select()
            .join(
                SpipAuteursLiens,
                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
            )
            .where(SpipAuteursLiens.id_objet == self._id)
        )
    def taxonomies(self) -> tuple[SpipMots, ...]:
        LOG.debug(f"Initialize taxonomies of `{self._url_title}`")
        return (
            SpipMots.select()
            .join(
                SpipMotsLiens,
                on=(SpipMots.id_mot == SpipMotsLiens.id_mot),
            )
            .where(SpipMotsLiens.id_objet == self._id)
        )
    # Write all the documents of this object
    def write_children(
        self,
        children: tuple[Document] | tuple[Any],
        forcedlang: str,
    ) -> list[str]:
        LOG.debug(f"Writing documents of {type(self).__name__} `{self._url_title}`")
        output: list[str] = []
        total = len(children)
        i = 0
        for obj in children:
            try:
                output.append(
                    obj.write_all(
                        self._depth,
                        self.dest_directory(),
                        i,
                        total,
                        forcedlang,
                        self.url(),
                    )
                )
                i += 1
            except (
                LangNotFoundError,
                DontExportDraftError,
                DontExportEmptyError,
                IgnoredPatternError,
            ) as err:
                LOG.debug(err)
        return output
    # Write object to output destination
    def write(self) -> str:
        # Make a directory for this object if there isn’t
        # If it cannot for incompatibility, try until it can
        incompatible: bool = True
        while incompatible:
            directory: str = self.dest_directory()
            try:
                mkdir(directory)
                break
            except FileExistsError:
                # If not stated incompatible with the following, will write in this dir
                incompatible = False
                # Create a new directory if write is about to overwrite an existing file
                # or to write into a directory without the same fileprefix
                for file in listdir(directory):
                    if isfile(directory + file):
                        LOG.debug(
                            f"Can {type(self).__name__} `{self.dest_path()}` of prefix "
                            + f"{self._fileprefix} and suffix {CFG.export_filetype}"
                            + f" be written along with `{file}` of prefix "
                            + f"`{file.split('.')[0]}` and suffix {file.split('.')[-1]}"
                            + f"` in {self.dest_directory()}` ?"
                        )
                        # Resolve conflict at first incompatible file encountered
                        if directory + file == self.dest_path() or (
                            file.split(".")[-1] == CFG.export_filetype
                            and file.split(".")[0] != self._fileprefix
                        ):
                            LOG.debug(
                                f"No, incrementing counter of {self.dest_directory()}"
                            )
                            self._storage_title_append += 1
                            incompatible = True
                            break
        # Write the content of this object into a file named as self.filename()
        with open(self.dest_path(), "w") as f:
            f.write(self.content())
        # Write the eventual static image of this object
        if self._static_img_path:
            copyfile(
                self._static_img_path,
                self.dest_directory() + basename(self._static_img_path),
            )
        return self.dest_path()
    # Append static images based on filename instead of DB to objects texts
    def append_static_images(self, obj_str: str = "art", load_str: str = "on"):
        for t in IMG_TYPES:
            path: str = CFG.data_dir + obj_str + load_str + str(self._id) + "." + t
            LOG.debug(f"Search static image of `{self._url_title}` at: {path}")
            if isfile(path):
                LOG.debug(f"Found static image of `{self._url_title}` at: {path}")
                # Append static image to content
                self._text += f"\n\n![]({basename(path)})"
                # Store it’s path to write it later
                self._static_img_path = path
                break
    # Apply post-init conversions and cancel the export if self not of the right lang
    def convert(self, forced_lang: str) -> None:
        self.convert_title(forced_lang)
        self.convert_text(forced_lang)
        self.convert_extra()
        self.convert_taxonomies(forced_lang)
        if self.lang != forced_lang:
            raise LangNotFoundError(
                f"`{self._url_title}` lang is {self.lang} instead of the wanted"
                + f" {forced_lang} and it don’t contains"
                + f" {forced_lang} translation in Markup either"
            )
        self.append_static_images()
 class Article(SpipRedactional, SpipArticles):
    _fileprefix: str = "index"
    _style = (BOLD, YELLOW)  # Articles accent color is yellow
    class Meta:
        table_name: str = "spip_articles"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._id = self.id_article
        # Initialize converted fields beginning with underscore
        self._accept_forum = self.accepter_forum == "oui"
        self._surtitle = self.convert_field(str(self.surtitre))
        self._subtitle = self.convert_field(str(self.soustitre))
        self._caption = self.convert_field(str(self.chapo))
        self._ps = self.convert_field(str(self.ps))
        self._microblog = self.convert_field(str(self.microblog))
    def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
        meta: dict[str, Any] = {
            # Article specific
            "summary": self.chapo,
            "surtitle": self.surtitre,
            "subtitle": self.soustitre,
            "date": self.date_redac,
            "authors": [author.nom for author in self.authors()],
        }
        # Add debugging meta if needed
        if CFG.debug_meta:
            meta |= {"spip_id_rubrique": self.id_rubrique}
        if self._taxonomies:
            meta |= self._taxonomies
        if append is not None:
            return super().frontmatter(meta | append)
        else:
            return super().frontmatter(meta)
    def content(self) -> str:
        body: str = super().content()
        # If there is a caption, add the caption followed by a hr
        if len(self._caption) > 0:
            body += "\n\n" + self._caption + "\n\n***"
        # PS
        if len(self._ps) > 0:
            body += "\n\n# POST-SCRIPTUM\n\n" + self._ps
        # Microblog
        if len(self._microblog) > 0:
            body += "\n\n# MICROBLOGGING\n\n" + self._microblog
        return body
    # Perform all the write steps of this object
    def write_all(
        self,
        parentdepth: int,
        storage_parentdir: str,
        index: int,
        total: int,
        forced_lang: str,
        parenturl: str,
    ) -> DeepDict:
        self.convert(forced_lang)
        return {
            "msg": super().write_all(
                parentdepth, storage_parentdir, index, total, parenturl
            ),
            "documents": self.write_children(self.documents(), forced_lang),
        }
 class Section(SpipRedactional, SpipRubriques):
    _fileprefix: str = "_index"
    _style = (BOLD, GREEN)  # Sections accent color is green
    class Meta:
        table_name: str = "spip_rubriques"
    def frontmatter(self, add: Optional[dict[str, Any]] = None) -> str:
        meta: dict[str, Any] = {}
        # Add debugging meta if needed
        if CFG.debug_meta:
            meta = meta | {
                "spip_id_parent": self.id_parent,
                "spip_profondeur": self.profondeur,
            }
        if add is not None:
            meta = meta | add
        return super().frontmatter(meta)
    # Get articles of this section
    def articles(self, limit: int = 10**6) -> tuple[Article]:
        LOG.debug(f"Initialize articles of `{self._url_title}`")
        return (
            Article.select()
            .where(Article.id_rubrique == self._id)
            .order_by(Article.date.desc())
            .limit(limit)
        )
    # Get subsections of this section
    def sections(self, limit: int = 10**6) -> tuple["Section"]:
        LOG.debug(f"Initialize subsections of `{self._url_title}`")
        return (
            Section.select()
            .where(Section.id_parent == self._id)
            .order_by(Section.date.desc())
            .limit(limit)
        )
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._id = self.id_rubrique
        self._depth = self.profondeur
    # Perform all the write steps of this object
    def write_all(
        self,
        parentdepth: int,
        storage_parentdir: str,
        index: int,
        total: int,
        forced_lang: str,
        parenturl: str = "",
    ) -> DeepDict:
        self.convert(forced_lang)
        return {
            "msg": super().write_all(
                parentdepth, storage_parentdir, index, total, parenturl
            ),
            "documents": self.write_children(self.documents(), forced_lang),
            "articles": self.write_children(self.articles(), forced_lang),
            "sections": self.write_children(self.sections(), forced_lang),
        }
    # Append static images based on filename instead of DB to objects texts
    def append_static_images(self, obj_str: str = "rub", load_str: str = "on"):
        super().append_static_images(obj_str, load_str)
--- a/spip2md/lib.py
+++ b/spip2md/lib.py
@ -0,0 +1,147 @@
 """
 This file is part of spip2md.
 Copyright (C) 2023 LCPQ/Guilhem Fauré
 spip2md is free software: you can redistribute it and/or modify it under the terms of
 the GNU General Public License version 2 as published by the Free Software Foundation.
 spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 """
 import logging
 from os import makedirs, remove
 from os.path import isfile
 from shutil import rmtree
 from typing import Optional
 from spip2md.config import CFG, NAME
 from spip2md.extended_models import (
    DeepDict,
    DontExportDraftError,
    IgnoredPatternError,
    LangNotFoundError,
    Section,
 )
 from spip2md.spip_models import DB
 from spip2md.style import BOLD, esc
 # Define loggers for this file
 ROOTLOG = logging.getLogger(NAME + ".root")
 TREELOG = logging.getLogger(NAME + ".tree")
 # Initialize the database with settings from CFG
 DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
 # Write the root sections and their subtrees
 def write_root(parent_dir: str, parent_id: int = 0) -> DeepDict:
    # Print starting message
    print(
        f"""\
 Begin exporting {esc(BOLD)}{CFG.db}@{CFG.db_host}{esc()} SPIP database to plain \
 Markdown+YAML files,
 into the directory {esc(BOLD)}{parent_dir}{esc()}, \
 as database user {esc(BOLD)}{CFG.db_user}{esc()}
 """
    )
    buffer: list[DeepDict] = []  # Define temporary storage for output
    # Write each sections (write their entire subtree) for each export language
    # Language specified in database can differ from markup, se we force a language
    #   and remove irrelevant ones at each looping
    for lang in CFG.export_languages:
        ROOTLOG.debug("Initialize root sections")
        # Get all sections of parentID ROOTID
        child_sections: tuple[Section, ...] = (
            Section.select()
            .where(Section.id_parent == parent_id)
            .order_by(Section.date.desc())
        )
        nb: int = len(child_sections)
        for i, s in enumerate(child_sections):
            ROOTLOG.debug(f"Begin exporting {lang} root section {i}/{nb}")
            try:
                buffer.append(s.write_all(-1, CFG.output_dir, i, nb, lang))
            except LangNotFoundError as err:
                ROOTLOG.debug(err)  # Log the message
            except DontExportDraftError as err:  # Will happen if not CFG.export_drafts
                ROOTLOG.debug(err)  # Log the message
            except IgnoredPatternError as err:
                ROOTLOG.debug(err)  # Log the message
            print()  # Break line between level 0 sections in output
            ROOTLOG.debug(
                f"Finished exporting {lang} root section {i}/{nb} {s._url_title}"
            )
    return {"sections": buffer}
 # Count on outputted tree & print results if finished
 def summarize(
    tree: DeepDict | list[DeepDict] | list[str],
    depth: int = -1,
    prevkey: Optional[str] = None,
    counter: Optional[dict[str, int]] = None,
 ) -> dict[str, int]:
    if counter is None:
        counter = {}
        # __import__("pprint").pprint(tree)  # DEBUG
    if type(tree) == dict:
        for key, sub in tree.items():
            if type(sub) == list:
                counter = summarize(sub, depth + 1, key, counter)
            # if type of sub is str, it’s just the name, don’t count
    if type(tree) == list:
        for sub in tree:
            if prevkey is not None:
                if prevkey not in counter:
                    counter[prevkey] = 0
                counter[prevkey] += 1
            if type(sub) == dict:
                counter = summarize(sub, depth + 1, None, counter)
    # End message only if it’s the root one
    if depth == -1:
        TREELOG.debug(tree)
        totals: str = ""
        for key, val in counter.items():
            totals += f"{esc(BOLD)}{val}{esc()} {key}, "
        print(f"Exported a total of {totals[:-2]}")
        # Warn about issued warnings in log file
        if isfile(CFG.logfile):
            print(
                f"Logging level was set to {esc(BOLD)}{CFG.loglevel}{esc()}, there are"
                + f" warnings and informations in {esc(BOLD)}{CFG.logfile}{esc()}"
            )
    return counter
 # Clear the previous log file if needed, then configure logging
 def init_logging(**kwargs) -> None:
    if CFG.clear_log and isfile(CFG.logfile):
        remove(CFG.logfile)
    logging.basicConfig(
        encoding="utf-8", filename=CFG.logfile, level=CFG.loglevel, **kwargs
    )
    # return logging.getLogger(CFG.logname)
 # Clear the output dir if needed & create a new
 def clear_output() -> None:
    if CFG.clear_output:
        rmtree(CFG.output_dir, True)
    makedirs(CFG.output_dir, exist_ok=True)
 # When directly executed as a script
 def cli():
    init_logging()  # Initialize logging and logfile
    clear_output()  # Eventually remove already existing output dir
    with DB:  # Connect to the database where SPIP site is stored in this block
        # Write everything while printing the output human-readably
        summarize(write_root(CFG.output_dir))
--- a/spip2md/regexmaps.py
+++ b/spip2md/regexmaps.py
@ -16,7 +16,7 @@ If not, see <https://www.gnu.org/licenses/>.
 # pyright: strict
 from re import I, S, compile
-LANGCODES = ("en", "fr", "es")  # And more …
+from spip2md.config import CFG
 # ((SPIP syntax, Replacement Markdown syntax), …)
 SPIP_MARKDOWN = (
@ -167,7 +167,7 @@ SECTION_LINK = (
 MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
 CONFIG_LANGS = {  # lang of configuration: (match against this lang)
    lang: compile(r"\[ *" + lang + r" *\]\s*(.+?)\s*(?=\[[a-zA-Z\-]{2,6}\]|$)", S | I)
-    for lang in LANGCODES
+    for lang in CFG.export_languages
 }
 # MULTILANGS = compile(  # Matches agains all langs of multi blocks
 #     r"\[([a-zA-Z\-]{2,6})\]\s*(.+?)\s*(?=\[[a-zA-Z\-]{2,6}\]|$)", S | I
@ -316,7 +316,10 @@ SPECIAL_OUTPUT = (
    compile(r"^([0-9]+?\.)(?= )"),  # Counter
    compile(r"(?<= )(->)(?= )"),  # Arrow
    compile(r"(?<=^Exporting )([0-9]+?)(?= )"),  # Total
-) + tuple(compile(r"(?<=level [0-9] )(" + language + r" )") for language in LANGCODES)
+) + tuple(
    compile(r"(?<=level [0-9] )(" + language + r" )")
    for language in CFG.export_languages
 )
 # Warning elements in terminal output to highlight
 WARNING_OUTPUT = (
--- a/spip2md/spip_models.py
+++ b/spip2md/spip_models.py
@ -12,9 +12,6 @@ See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 This file defines Spip database as objects, attributes and methods with Peewee
 """
 # type: ignore
 from peewee import (
@ -32,12 +29,17 @@ from peewee import (
    TextField,
 )
-DB: MySQLDatabase = MySQLDatabase(None)
+DB = MySQLDatabase(None)
 # class UnknownField(object):
 #     def __init__(self, *_, **__):
 #         pass
 class BaseModel(Model):
    class Meta:
-        database = DB
+        database: MySQLDatabase = DB
 class SpipArticles(BaseModel):
@ -72,7 +74,7 @@ class SpipArticles(BaseModel):
    visites = IntegerField(constraints=[SQL("DEFAULT 0")])
    class Meta:
-        table_name = "spip_articles"
+        table_name: str = "spip_articles"
        indexes = ((("statut", "date"), False),)
@ -105,7 +107,7 @@ class SpipAuteurs(BaseModel):
    webmestre = CharField(constraints=[SQL("DEFAULT 'non'")])
    class Meta:
-        table_name = "spip_auteurs"
+        table_name: str = "spip_auteurs"
 class SpipAuteursLiens(BaseModel):
@ -115,7 +117,7 @@ class SpipAuteursLiens(BaseModel):
    vu = CharField(constraints=[SQL("DEFAULT 'non'")])
    class Meta:
-        table_name = "spip_auteurs_liens"
+        table_name: str = "spip_auteurs_liens"
        indexes = ((("id_auteur", "id_objet", "objet"), True),)
        primary_key = CompositeKey("id_auteur", "id_objet", "objet")
@ -135,7 +137,7 @@ class SpipBreves(BaseModel):
    titre = TextField()
    class Meta:
-        table_name = "spip_breves"
+        table_name: str = "spip_breves"
 class SpipDepots(BaseModel):
@ -155,7 +157,7 @@ class SpipDepots(BaseModel):
    xml_paquets = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_depots"
+        table_name: str = "spip_depots"
 class SpipDepotsPlugins(BaseModel):
@ -163,7 +165,7 @@ class SpipDepotsPlugins(BaseModel):
    id_plugin = BigIntegerField()
    class Meta:
-        table_name = "spip_depots_plugins"
+        table_name: str = "spip_depots_plugins"
        indexes = ((("id_depot", "id_plugin"), True),)
        primary_key = CompositeKey("id_depot", "id_plugin")
@ -189,7 +191,7 @@ class SpipDocuments(BaseModel):
    titre = TextField()
    class Meta:
-        table_name = "spip_documents"
+        table_name: str = "spip_documents"
 class SpipDocumentsLiens(BaseModel):
@ -199,7 +201,7 @@ class SpipDocumentsLiens(BaseModel):
    vu = CharField(constraints=[SQL("DEFAULT 'non'")])
    class Meta:
-        table_name = "spip_documents_liens"
+        table_name: str = "spip_documents_liens"
        indexes = ((("id_document", "id_objet", "objet"), True),)
        primary_key = CompositeKey("id_document", "id_objet", "objet")
@ -229,7 +231,7 @@ class SpipEvenements(BaseModel):
    titre = TextField()
    class Meta:
-        table_name = "spip_evenements"
+        table_name: str = "spip_evenements"
 class SpipEvenementsParticipants(BaseModel):
@ -242,7 +244,7 @@ class SpipEvenementsParticipants(BaseModel):
    reponse = CharField(constraints=[SQL("DEFAULT '?'")])
    class Meta:
-        table_name = "spip_evenements_participants"
+        table_name: str = "spip_evenements_participants"
 class SpipForum(BaseModel):
@ -265,7 +267,7 @@ class SpipForum(BaseModel):
    url_site = TextField()
    class Meta:
-        table_name = "spip_forum"
+        table_name: str = "spip_forum"
        indexes = ((("statut", "id_parent", "id_objet", "objet", "date_heure"), False),)
@ -283,7 +285,7 @@ class SpipGroupesMots(BaseModel):
    unseul = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_groupes_mots"
+        table_name: str = "spip_groupes_mots"
 class SpipJobs(BaseModel):
@ -298,7 +300,7 @@ class SpipJobs(BaseModel):
    status = IntegerField(constraints=[SQL("DEFAULT 1")], index=True)
    class Meta:
-        table_name = "spip_jobs"
+        table_name: str = "spip_jobs"
 class SpipJobsLiens(BaseModel):
@ -307,7 +309,7 @@ class SpipJobsLiens(BaseModel):
    objet = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_jobs_liens"
+        table_name: str = "spip_jobs_liens"
        indexes = ((("id_job", "id_objet", "objet"), True),)
        primary_key = CompositeKey("id_job", "id_objet", "objet")
@ -321,7 +323,7 @@ class SpipMeslettres(BaseModel):
    url_txt = CharField()
    class Meta:
-        table_name = "spip_meslettres"
+        table_name: str = "spip_meslettres"
 class SpipMessages(BaseModel):
@ -338,7 +340,7 @@ class SpipMessages(BaseModel):
    type = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_messages"
+        table_name: str = "spip_messages"
 class SpipMeta(BaseModel):
@ -348,7 +350,7 @@ class SpipMeta(BaseModel):
    valeur = TextField(null=True)
    class Meta:
-        table_name = "spip_meta"
+        table_name: str = "spip_meta"
 class SpipMots(BaseModel):
@ -362,7 +364,7 @@ class SpipMots(BaseModel):
    type = TextField()
    class Meta:
-        table_name = "spip_mots"
+        table_name: str = "spip_mots"
 class SpipMotsLiens(BaseModel):
@ -371,7 +373,7 @@ class SpipMotsLiens(BaseModel):
    objet = CharField(constraints=[SQL("DEFAULT ''")], index=True)
    class Meta:
-        table_name = "spip_mots_liens"
+        table_name: str = "spip_mots_liens"
        indexes = ((("id_mot", "id_objet", "objet"), True),)
        primary_key = CompositeKey("id_mot", "id_objet", "objet")
@ -384,7 +386,7 @@ class SpipOrthoCache(BaseModel):
    suggest = TextField()
    class Meta:
-        table_name = "spip_ortho_cache"
+        table_name: str = "spip_ortho_cache"
        indexes = ((("lang", "mot"), True),)
        primary_key = CompositeKey("lang", "mot")
@ -396,7 +398,7 @@ class SpipOrthoDico(BaseModel):
    mot = CharField()
    class Meta:
-        table_name = "spip_ortho_dico"
+        table_name: str = "spip_ortho_dico"
        indexes = ((("lang", "mot"), True),)
        primary_key = CompositeKey("lang", "mot")
@ -441,7 +443,7 @@ class SpipPaquets(BaseModel):
    version_base = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_paquets"
+        table_name: str = "spip_paquets"
 class SpipPetitions(BaseModel):
@ -456,7 +458,7 @@ class SpipPetitions(BaseModel):
    texte = TextField()
    class Meta:
-        table_name = "spip_petitions"
+        table_name: str = "spip_petitions"
 class SpipPlugins(BaseModel):
@ -473,7 +475,7 @@ class SpipPlugins(BaseModel):
    vmax = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_plugins"
+        table_name: str = "spip_plugins"
 class SpipReferers(BaseModel):
@ -486,7 +488,7 @@ class SpipReferers(BaseModel):
    visites_veille = IntegerField(constraints=[SQL("DEFAULT 0")])
    class Meta:
-        table_name = "spip_referers"
+        table_name: str = "spip_referers"
 class SpipReferersArticles(BaseModel):
@ -497,7 +499,7 @@ class SpipReferersArticles(BaseModel):
    visites = IntegerField()
    class Meta:
-        table_name = "spip_referers_articles"
+        table_name: str = "spip_referers_articles"
        indexes = ((("id_article", "referer_md5"), True),)
        primary_key = CompositeKey("id_article", "referer_md5")
@ -511,7 +513,7 @@ class SpipResultats(BaseModel):
    table_objet = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_resultats"
+        table_name: str = "spip_resultats"
        primary_key = False
@ -535,7 +537,7 @@ class SpipRubriques(BaseModel):
    titre = TextField()
    class Meta:
-        table_name = "spip_rubriques"
+        table_name: str = "spip_rubriques"
 class SpipSignatures(BaseModel):
@ -551,7 +553,7 @@ class SpipSignatures(BaseModel):
    url_site = TextField()
    class Meta:
-        table_name = "spip_signatures"
+        table_name: str = "spip_signatures"
 class SpipSyndic(BaseModel):
@ -575,7 +577,7 @@ class SpipSyndic(BaseModel):
    url_syndic = TextField()
    class Meta:
-        table_name = "spip_syndic"
+        table_name: str = "spip_syndic"
        indexes = ((("statut", "date_syndic"), False),)
@ -595,14 +597,14 @@ class SpipSyndicArticles(BaseModel):
    url_source = TextField()
    class Meta:
-        table_name = "spip_syndic_articles"
+        table_name: str = "spip_syndic_articles"
 class SpipTest(BaseModel):
    a = IntegerField(null=True)
    class Meta:
-        table_name = "spip_test"
+        table_name: str = "spip_test"
        primary_key = False
@ -617,7 +619,7 @@ class SpipTypesDocuments(BaseModel):
    upload = CharField(constraints=[SQL("DEFAULT 'oui'")])
    class Meta:
-        table_name = "spip_types_documents"
+        table_name: str = "spip_types_documents"
 class SpipUrls(BaseModel):
@ -630,7 +632,7 @@ class SpipUrls(BaseModel):
    url = CharField()
    class Meta:
-        table_name = "spip_urls"
+        table_name: str = "spip_urls"
        indexes = (
            (("id_parent", "url"), True),
            (("type", "id_objet"), False),
@ -649,7 +651,7 @@ class SpipVersions(BaseModel):
    titre_version = TextField()
    class Meta:
-        table_name = "spip_versions"
+        table_name: str = "spip_versions"
        indexes = ((("id_version", "id_objet", "objet"), True),)
        primary_key = CompositeKey("id_objet", "id_version", "objet")
@ -664,7 +666,7 @@ class SpipVersionsFragments(BaseModel):
    version_min = IntegerField(constraints=[SQL("DEFAULT 0")])
    class Meta:
-        table_name = "spip_versions_fragments"
+        table_name: str = "spip_versions_fragments"
        indexes = ((("id_objet", "objet", "id_fragment", "version_min"), True),)
        primary_key = CompositeKey("id_fragment", "id_objet", "objet", "version_min")
@ -675,7 +677,7 @@ class SpipVisites(BaseModel):
    visites = IntegerField(constraints=[SQL("DEFAULT 0")])
    class Meta:
-        table_name = "spip_visites"
+        table_name: str = "spip_visites"
 class SpipVisitesArticles(BaseModel):
@ -685,7 +687,7 @@ class SpipVisitesArticles(BaseModel):
    visites = IntegerField(constraints=[SQL("DEFAULT 0")])
    class Meta:
-        table_name = "spip_visites_articles"
+        table_name: str = "spip_visites_articles"
        indexes = ((("date", "id_article"), True),)
        primary_key = CompositeKey("date", "id_article")
@ -699,7 +701,7 @@ class SpipZones(BaseModel):
    titre = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_zones"
+        table_name: str = "spip_zones"
 class SpipZonesLiens(BaseModel):
@ -708,11 +710,6 @@ class SpipZonesLiens(BaseModel):
    objet = CharField(constraints=[SQL("DEFAULT ''")])
    class Meta:
-        table_name = "spip_zones_liens"
+        table_name: str = "spip_zones_liens"
        indexes = ((("id_zone", "id_objet", "objet"), True),)
        primary_key = CompositeKey("id_objet", "id_zone", "objet")
 # class UnknownField(object):
 #     def __init__(self, *_, **__):
 #         pass
--- a/spip2md/style.py
+++ b/spip2md/style.py
@ -12,19 +12,7 @@ See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 This file contains functions needed to control this package from command line and to
 define a printable classes which adds terminal printing capabilites to Spip objects
 """
 # pyright: strict
 from os import makedirs
 from shutil import rmtree
 from spip2md.config import Configuration
 from spip2md.spip_models import DB
 from spip2md.write import WritableSite
 # Define styles for terminal printing
 BOLD = 1  # Bold
 ITALIC = 3  # Italic
@ -52,31 +40,3 @@ def esc(*args: int) -> str:
        params += str(a) + ";"
    # Base terminal escape sequence that needs to be closed by "m"
    return "\033[" + params[:-1] + "m"
 # Extend Site class to add terminal output capabilities
 class PrintableSite(WritableSite):
    def write(self) -> str:
        return "write path"
 # Initialize DB database connection from config
 def init_db(cfg: Configuration):
    DB.init(  # type: ignore
        cfg.db, host=cfg.db_host, user=cfg.db_user, password=cfg.db_pass
    )
 def main(*argv: str):
    cfg = Configuration(*argv)  # Get the configuration
    init_db(cfg)
    # Eventually remove already existing output dir
    if cfg.clear_output:
        rmtree(cfg.output_dir, True)
    makedirs(cfg.output_dir, exist_ok=True)
    with DB:  # Connect to the database where SPIP site is stored in this block
        # Write everything while printing the output human-readably
        PrintableSite(cfg).write()
--- a/spip2md/test.py
+++ b/spip2md/test.py
@ -1,16 +0,0 @@
 # File for test purposes, mainly in interactive python
 from spip2md.config import Configuration
 from spip2md.convert import (
    ConvertableSite,
 )
 from spip2md.spip_models import DB
 cfg = Configuration()  # Get the configuration
 DB.init(  # type: ignore
    cfg.db, host=cfg.db_host, user=cfg.db_user, password=cfg.db_pass
 )
 SITE = ConvertableSite(cfg)
 ID = ("document", 1293)
--- a/spip2md/write.py
+++ b/spip2md/write.py
@ -1,24 +0,0 @@
 """
 This file is part of spip2md.
 Copyright (C) 2023 LCPQ/Guilhem Fauré
 spip2md is free software: you can redistribute it and/or modify it under the terms of
 the GNU General Public License version 2 as published by the Free Software Foundation.
 spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.
 You should have received a copy of the GNU General Public License along with spip2md.
 If not, see <https://www.gnu.org/licenses/>.
 This file adds write to disk capabilities to spip objects
 """
 from spip2md.convert import ConvertableSite
 class WritableSite(ConvertableSite):
    def write(self) -> str:
        return "write path"
Author	SHA1	Message	Date
Guilhem Fauré	55e19d4825	up version	2023-06-23 16:47:17 +02:00
Guilhem Fauré	b245ff75c5	option to rename taxonomies	2023-06-23 15:11:01 +02:00
Guilhem Fauré	8769185b8d	proper taxonomy management, no more tags only	2023-06-23 11:47:22 +02:00
Guilhem Fauré	164a1e8228	tags conversion	2023-06-23 11:14:14 +02:00
Guilhem Fauré	3cc90f6dc2	fixed tags, use translated description instead of id	2023-06-23 11:12:40 +02:00
Guilhem Fauré	00f3ed6cf9	added static images	2023-06-23 09:59:30 +02:00
Guilhem Fauré	3ab94aa388	add tags IDs	2023-06-23 09:34:37 +02:00