refactor to use Peewee objects extension in place of redefining every SPIP atribute

2023-05-24 10:43:39 +02:00 · 2023-05-24 10:43:39 +02:00 · 13fa720562
commit 13fa720562
parent 4d269357de
5 changed files with 298 additions and 383 deletions
--- a/spip2md/converters.py
+++ b/spip2md/converters.py
@ -3,7 +3,7 @@ from re import I, S, compile, finditer, sub
 from typing import Optional
 # SPIP syntax to Markdown
-spip_to_markdown = (
+SPIP_TO_MARKDOWN = (
    (  # horizontal rule
        compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
        # r"---",
@ -40,6 +40,14 @@ spip_to_markdown = (
        ),
        r"~\1~",
    ),
    (  # images
        compile(r"<(img|image)([0-9]+)(\|.*?)*>", S | I),
        r"![](\1\2)",
    ),
    (  # documents & embeds
        compile(r"<(doc|document|emb)([0-9]+)(\|.*?)*>", S | I),
        r"[](\1\2)",
    ),
    (  # anchor
        compile(r"\[ *(.*?) *-> *(.*?) *\]", S | I),
        r"[\1](\2)",
@ -100,58 +108,20 @@ spip_to_markdown = (
        ),
        r"\1",
    ),
-)
+    (  # WARNING remove every html tag
-
+        compile(r"<\/?.*?>\s*", S | I),
 spip_to_text = (
    (  # strong
        compile(r"\{\{ *(.*?) *\}\}", S | I),
        r"\1",
    ),
    (  # html strong
        compile(r"<strong> *(.*?) *</strong>", S | I),
        r"\1",
    ),
    (  # emphasis
        compile(r"\{ *(.*?) *\}", S | I),
        r"\1",
    ),
    (  # html emphasis
        compile(r"<i> *(.*?) *<\/i>", S | I),
        r"\1",
    ),
    (  # strikethrough
        compile(
            r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
            S | I,
        ),
        r"\1",
    ),
    (  # Keep only the first language in multi-language blocks
        compile(
            r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
            S | I,
        ),
        r"\1",
    ),
    (  # remove every html tag
        compile(r"<\/?.*?> *", S | I),
        r"",
    ),
    (  # Remove beginning with angle bracket(s)
        compile(r"^>+ +", S | I),
        r"",
    ),
    (  # Remove beginning with a number followed by a dot
        compile(r"^\d+\. +", S | I),
        r"",
    ),
 )
-# HTML tag WARNING can be used to remove them all
+# Further cleaning for metadata texts such as titles or descriptions
-html_tag = compile(r"<\/?.*?> *", S | I)
+SPIP_META_BLOAT = (
    compile(r"^>+ +", S | I),  # Remove beginning with angle bracket(s)
    compile(r"^\d+\. +", S | I),  # Remove beginning with a number followed by a dot
 )
 # Broken ISO encoding to proper UTF-8
-iso_to_utf = (
+ISO_TO_UTF = (
    (  # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
        "â€™",
        r"’",
@ -264,82 +234,71 @@ iso_to_utf = (
 )
 # WARNING unknown broken encoding
-unknown_iso = (
+UNKNOWN_ISO = (
    r"â€¨",
    r"âˆ†",
    r"Ã»",
 )
-# Apply spip_to_markdown conversions to a text
+# Apply SPIP to Markdown & ISO to UTF conversions to a text, & eventually clean meta
-def convert_body(text: Optional[str]) -> str:
+def convert(text: Optional[str], clean_meta: bool = False) -> str:
    if text is None:
        return ""
-    for spip, markdown in spip_to_markdown:
+    for spip, markdown in SPIP_TO_MARKDOWN:
        text = spip.sub(markdown, text)
-    for iso, utf in iso_to_utf:
+    if clean_meta:
        for bloat in SPIP_META_BLOAT:
            text = bloat.sub("", text)
    for iso, utf in ISO_TO_UTF:
        text = text.replace(iso, utf)
    return text
-# Apply spip_to_text conversions to a text
+# Replace images & files links in Markdown with real slugs of the actually linked files
-def convert_meta(text: Optional[str]) -> str:
+def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
    if text is None:
        return ""
    for spip, metadata in spip_to_text:
        text = spip.sub(metadata, text)
    for iso, utf in iso_to_utf:
        text = text.replace(iso, utf)
    return text
 # Replace images & documents in SPIP text with Markdown links with human-readable names
 def convert_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
    for id, name, slug in documents:
        # Replace images that dont have a title written in text
        text = sub(
-            r"<(?:img|image)" + str(id) + r"(\|.*?)*>",
+            r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
            f"![{name}]({slug})",
            text,
        )
        # Replace images that dont have a title written in text
        text = sub(
-            r"<(?:doc|emb)" + str(id) + r"(\|.*?)*>",
+            r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
            f"[{name}]({slug})",
            text,
        )
        # Replace images that already had a title in Markdown style link
        text = sub(
-            r"\[(.*?)\]\((?:doc|emb)" + str(id) + r"(\|.*?)*\)",
+            r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
            f"![\\1]({slug})",
            text,
        )
        # Replace documents that already had a title in Markdown style link
        text = sub(
            r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
            f"[\\1]({slug})",
            text,
        )
    return text
 # Replace unknown chars with empty strings (delete them)
 def remove_unknown_chars(text: str) -> str:
    for char in unknown_iso:
        text.replace(char, "")
    return text
 # Replace HTML tags chars with empty strings (delete them)
 def remove_tags(text: str) -> str:
    return html_tag.sub("", text)
 # Return a list of tuples giving the start and end of unknown substring in text
 def unknown_chars(text: str) -> list[tuple[int, int]]:
    positions: list[tuple[int, int]] = []
-    for char in unknown_iso:
+    for char in UNKNOWN_ISO:
        for match in finditer("(" + char + ")+", text):
            positions.append((match.start(), match.end()))
    return positions
 # Return strings with unknown chards found in text, surrounded by context_length chars
-def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
+def unknown_chars_context(text: str, context_length: int = 20) -> list[str]:
    errors: list[str] = []
    context: str = r".{0," + str(context_length) + r"}"
-    for char in unknown_iso:
+    for char in UNKNOWN_ISO:
        matches = finditer(
            context + r"(?=" + char + r")" + char + r".*?(?=\r?\n|$)",
            text,
--- a/spip2md/database.py
+++ b/spip2md/database.py
@ -1,4 +1,3 @@
 # pyright: basic
 # type: ignore
 from peewee import (
    SQL,
@ -15,7 +14,7 @@ from peewee import (
    TextField,
 )
-db = MySQLDatabase(None)
+DB = MySQLDatabase(None)
 # class UnknownField(object):
@ -25,7 +24,7 @@ db = MySQLDatabase(None)
 class BaseModel(Model):
    class Meta:
-        database: MySQLDatabase = db
+        database: MySQLDatabase = DB
 class SpipArticles(BaseModel):
--- a/spip2md/items.py
+++ b/spip2md/items.py
@ -1,259 +0,0 @@
 # pyright: strict
 from os.path import basename, splitext
 from typing import Any, Optional
 from slugify import slugify
 from yaml import dump
 from converter import convert_body, convert_documents, convert_meta, remove_tags
 from database import (
    SpipArticles,
    SpipAuteurs,
    SpipAuteursLiens,
    SpipDocuments,
    SpipDocumentsLiens,
    SpipRubriques,
 )
 EXPORTTYPE: str = "md"
 class Iterator:
    items: list[Any]
    def __init__(self) -> None:
        # Set the limit at the number of retrieved items
        self.LIMIT: int = len(self.items)
        # Start before the first element
        self.count: int = -1
    def __iter__(self):
        return self
    def __len__(self) -> int:
        return self.LIMIT
    def remaining(self) -> int:
        return self.LIMIT - self.count
    def __next__(self) -> Any:
        self.count += 1
        if self.remaining() <= 0:
            raise StopIteration
        return self.items[self.count]
 class Document:
    def __init__(self, document: SpipDocuments) -> None:
        self.id: int = document.id_document
        self.thumbnail_id: int = document.id_vignette
        self.title: str = convert_meta(document.titre)
        self.date: str = document.date
        self.description: str = convert_meta(document.descriptif)
        self.file: str = document.fichier
        self.draft: bool = document.statut == "publie"
        self.creation: str = document.date
        self.publication: str = document.date_publication
        self.update: str = document.maj
        self.media: str = document.media
    def get_slug(self, date: bool = False) -> str:
        name_type = splitext(basename(self.file))
        return (
            slugify((self.publication + "-" if date else "") + name_type[0])
            + name_type[1]
        )
 class Documents(Iterator):
    def __init__(self, object_id: int) -> None:
        # Query the DB to retrieve all documents related to object of id object_id
        items = (
            SpipDocuments.select()
            .join(
                SpipDocumentsLiens,
                on=(SpipDocuments.id_document == SpipDocumentsLiens.id_document),
            )
            .where(SpipDocumentsLiens.id_objet == object_id)
        )
        self.items: list[Document] = [Document(i) for i in items]
        super().__init__()
 class Item:
    id: int
    def __init__(self, item: SpipArticles | SpipRubriques):
        self.title: str = convert_meta(item.titre)
        self.section_id: int = item.id_rubrique
        self.description: str = convert_meta(item.descriptif)
        self.text: str = convert_body(item.texte)  # Convert SPIP to Markdown
        self.publication: str = item.date
        self.draft: bool = item.statut == "publie"
        self.sector_id: int = item.id_secteur
        self.update: str = item.maj
        self.lang: str = item.lang
        self.set_lang: bool = item.langue_choisie == "oui"  # TODO Why ?
        self.translation_key: int = item.id_trad
        self.extra: str = convert_body(item.extra)  # Probably unused
    def get_slug(self, date: bool = False) -> str:
        return slugify((self.publication + "-" if date else "") + self.title)
    def get_filename(self) -> str:
        return "index" + "." + self.lang + "." + EXPORTTYPE
    def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
        return dump(
            {
                "lang": self.lang,
                "translationKey": self.translation_key,
                "title": self.title,
                "publishDate": self.publication,
                "lastmod": self.update,
                "draft": self.draft,
                "description": self.description,
                # Debugging
                "spip_id": self.id,
                "spip_id_secteur": self.sector_id,
            }
            | append
            if append is not None
            else {},
            allow_unicode=True,
        )
    def get_body(self) -> str:
        body: str = ""
        # Add the title as a Markdown h1
        if len(self.title) > 0:
            body += "\n\n# " + self.title
        # If there is a text, add the text preceded by two line breaks
        if len(self.text) > 0:
            # Convert images & files links
            text: str = convert_documents(
                self.text,
                [(d.id, d.title, d.get_slug()) for d in self.get_documents()],
            )
            # Remove remaining HTML after & append to body
            body += "\n\n" + remove_tags(text)
        # Same with an "extra" section
        if len(self.extra) > 0:
            body += "\n\n# EXTRA\n\n" + self.extra
        return body
    def get_content(self) -> str:
        # Return the final article text
        return "---\n" + self.get_frontmatter() + "---" + self.get_body()
    def get_documents(self) -> Documents:
        return Documents(self.id)
 class Article(Item):
    def __init__(self, article: SpipArticles):
        super().__init__(article)
        self.id: int = article.id_article
        self.surtitle: str = convert_meta(article.surtitre)  # Probably unused
        self.subtitle: str = convert_meta(article.soustitre)  # Probably unused
        self.caption: str = convert_body(article.chapo)  # Probably unused
        self.ps: str = convert_body(article.ps)  # Probably unused
        self.update_2: str = article.date_modif  # Probably unused duplicate of maj
        self.creation: str = article.date_redac
        self.forum: bool = article.accepter_forum == "oui"  # TODO Why ?
        self.sitename: str = article.nom_site  # Probably useless
        self.virtual: str = article.virtuel  # TODO Why ?
        self.microblog: str = article.microblog  # Probably unused
        # self.export = article.export  # USELESS
        # self.views: int = article.visites  # USELESS in static
        # self.referers: int = article.referers  # USELESS in static
        # self.popularity: float = article.popularite  # USELESS in static
        # self.version = article.id_version  # USELESS
    def get_authors(self) -> list[SpipAuteurs]:
        return (
            SpipAuteurs.select()
            .join(
                SpipAuteursLiens,
                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
            )
            .where(SpipAuteursLiens.id_objet == self.id)
        )
    def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
        return super().get_frontmatter(
            {
                "surtitle": self.surtitle,
                "subtitle": self.subtitle,
                "date": self.creation,
                "authors": [author.nom for author in self.get_authors()],
                # Debugging
                "spip_id_rubrique": self.section_id,
                "spip_id_secteur": self.sector_id,
                "spip_chapo": self.caption,
            }
            | append
            if append is not None
            else {},
        )
    def get_body(self) -> str:
        body: str = super().get_body()
        # If there is a caption, add the caption followed by a hr
        if hasattr(self, "caption") and len(self.caption) > 0:
            body += "\n\n" + self.caption + "\n\n***"
        # PS
        if hasattr(self, "ps") and len(self.ps) > 0:
            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
        # Microblog
        if hasattr(self, "microblog") and len(self.microblog) > 0:
            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return body
 class Section(Item):
    def __init__(self, section: SpipRubriques):
        super().__init__(section)
        self.id: int = section.id_rubrique
        self.parent_id: int = section.id_parent
        self.depth: int = section.profondeur
        self.agenda: int = section.agenda
    def get_filename(self) -> str:
        return "_" + super().get_filename()
    def get_articles(self, limit: int = 0):
        return Articles(self.id, limit)
 class Articles(Iterator):
    def __init__(self, section_id: int, limit: int = 0):
        # Query the DB to retrieve all articles sorted by publication date
        if limit > 0:
            items = (
                SpipArticles.select()
                .where(SpipArticles.id_rubrique == section_id)
                .order_by(SpipArticles.date.desc())
                .limit(limit)
            )
        else:
            items = (
                SpipArticles.select()
                .where(SpipArticles.id_rubrique == section_id)
                .order_by(SpipArticles.date.desc())
            )
        self.items: list[Article] = [Article(i) for i in items]
        super().__init__()
 class Sections(Iterator):
    def __init__(self, limit: int = 0):
        # Query the DB to retrieve all sections sorted by publication date
        if limit > 0:
            items = (
                SpipRubriques.select().order_by(SpipRubriques.date.desc()).limit(limit)
            )
        else:
            items = SpipRubriques.select().order_by(SpipRubriques.date.desc())
        self.items: list[Section] = [Section(i) for i in items]
        super().__init__()
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -1,18 +1,19 @@
 #!python
 # pyright: strict
 from os import makedirs
 from os.path import expanduser
 from shutil import copyfile, rmtree
 from sys import argv
 from config import config
-from converter import get_unknown_chars, unknown_chars
+from converters import unknown_chars, unknown_chars_context
-from database import db
+from database import DB
-from items import (
+from spipobjects import (
    Article,
    Document,
-    Section,
+    Rubrique,
-    Sections,
+    get_articles,
    get_documents,
    get_sections,
 )
@ -64,26 +65,27 @@ def indent(nb: int = 1) -> None:
 # Connect to the MySQL database with Peewee ORM
-db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
+DB.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
-db.connect()
+DB.connect()
 # Output information about ongoing export & write section to output destination
-def write_section(index: int, total: int, section: Section) -> str:
+def write_section(index: int, total: int, section: Rubrique) -> str:
    color = G  # Associate sections to green
    # Print the name of the exported section & number of remaining sections
    style(f"{index + 1}. ", BO)
-    highlight(section.title, *unknown_chars(section.title))
+    highlight(section.titre, *unknown_chars(section.titre))
-    style(f" {total-index-1}", BO, G)
+    style(f" {total-index-1}", BO, color)
    style(f" section{s(total-index)} left")
    # Define the section’s path (directory) & create directory(ies) if needed
-    sectiondir: str = config.output_dir + "/" + section.get_slug()
+    sectiondir: str = config.output_dir + "/" + section.slug()
    makedirs(sectiondir, exist_ok=True)
    # Define the section filename & write the index at that filename
-    sectionpath: str = sectiondir + "/" + section.get_filename()
+    sectionpath: str = sectiondir + "/" + section.filename()
    with open(sectionpath, "w") as f:
-        f.write(section.get_content())
+        f.write(section.content())
    # Print export location when finished exporting
-    style(" -> ", BO, G)
+    style(" -> ", BO, color)
    print(sectionpath)
    # Return the first "limit" articles of section
    return sectiondir
@ -91,30 +93,31 @@ def write_section(index: int, total: int, section: Section) -> str:
 # Output information about ongoing export & write article to output destination
 def write_article(index: int, total: int, article: Article, sectiondir: str) -> str:
    color = Y  # Associate articles to yellow
    # Print the remaining number of articles to export every 100 articles
    if index % 100 == 0:
        indent()
        print("Exporting", end="")
-        style(f" {total-index}", BO, Y)
+        style(f" {total-index}", BO, color)
        print(" SPIP", end="")
        style(f" article{s(total-index)}")
        print(" to Markdown & YAML files")
    # Print the title of the article being exported
    style(
        f"  {index + 1}. "
-        + ("EMPTY " if len(article.text) < 1 else "")
+        + ("EMPTY " if len(article.texte) < 1 else "")
        + f"{article.lang} "
    )
-    highlight(article.title, *unknown_chars(article.title))
+    highlight(article.titre, *unknown_chars(article.titre))
    # Define the full article path & create directory(ies) if needed
-    articledir: str = sectiondir + "/" + article.get_slug()
+    articledir: str = sectiondir + "/" + article.slug()
    makedirs(articledir, exist_ok=True)
    # Define the article filename & write the article at the filename
-    articlepath: str = articledir + "/" + article.get_filename()
+    articlepath: str = articledir + "/" + article.filename()
    with open(articlepath, "w") as f:
-        f.write(article.get_content())
+        f.write(article.content())
    # Print export location when finished exporting
-    style(" -> ", BO, B)
+    style(" -> ", BO, color)
    print(articlepath)
    return articledir
@ -123,34 +126,35 @@ def write_article(index: int, total: int, article: Article, sectiondir: str) ->
 def write_document(
    index: int, total: int, document: Document, objectdir: str, indent_depth: int = 1
 ) -> None:
    color = B  # Associate documents to blue
    if index % 100 == 0:
        indent(indent_depth)
        print("Exporting", end="")
-        style(f" {total-index}", BO, B)
+        style(f" {total-index}", BO, color)
        style(f" document{s(total-index)}\n")
    # Print the name of the file with a counter
    indent(indent_depth)
    style(f"{index + 1}. {document.media} ")
-    if len(document.title) > 0:
+    if len(document.titre) > 0:
-        highlight(document.title + " ", *unknown_chars(document.title))
+        highlight(document.titre + " ", *unknown_chars(document.titre))
    style("at ")
-    print(document.file, end="")
+    print(document.fichier, end="")
    # Define document path
-    documentpath: str = expanduser(config.data_dir + "/" + document.file)
+    documentpath: str = expanduser(config.data_dir + "/" + document.fichier)
    # Copy the document from it’s SPIP location to the new location
    try:
-        copyfile(documentpath, objectdir + "/" + document.get_slug())
+        copyfile(documentpath, objectdir + "/" + document.slug())
    except FileNotFoundError:
        style(" -> NOT FOUND!\n", BO, R)
    else:
        # Print the outputted file’s path when copied the file
-        style(" ->", BO, B)
+        style(" ->", BO, color)
-        print(f" {objectdir}/{document.get_slug()}")
+        print(f" {objectdir}/{document.slug()}")
 # Return true if an article field contains an unknown character
 def has_unknown_chars(article: Article) -> bool:
-    if len(get_unknown_chars(article.text)) > 0:
+    if len(unknown_chars_context(article.texte)) > 0:
        return True
    return False
@ -159,13 +163,13 @@ def has_unknown_chars(article: Article) -> bool:
 def warn_unknown_chars(article: Article) -> None:
    # Print the title of the article in which there is unknown characters
    # & the number of them
-    unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
+    unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
    nb: int = len(unknown_chars_apparitions)
    s: str = "s" if nb > 1 else ""
    style(f"{nb}")
    print(f" unknown character{s} in", end="")
    style(f" {article.lang} ")
-    highlight(article.title, *unknown_chars(article.title))
+    highlight(article.titre, *unknown_chars(article.titre))
    print()  # Break line
    # Print the context in which the unknown characters are found
    for text in unknown_chars_apparitions:
@ -197,7 +201,7 @@ if __name__ == "__main__":
    unknown_chars_articles: list[Article] = []
    # Get sections with an eventual maximum
-    sections = Sections(max_sections_export)
+    sections = get_sections(max_sections_export)
    nb_sections_export: int = len(sections)
    # Loop among sections & export them
@ -205,11 +209,11 @@ if __name__ == "__main__":
        # Write the section & store its articles
        sectiondir = write_section(i, nb_sections_export, section)
        # Loop over section’s related files (images …)
-        documents = section.get_documents()
+        documents = get_documents(section.id_rubrique)
        for i, document in enumerate(documents):
            write_document(i, len(documents), document, sectiondir)
        # Loop over section’s articles
-        articles = section.get_articles(max_articles_export)
+        articles = get_articles(section.id_rubrique, (max_articles_export))
        for i, article in enumerate(articles):
            articledir = write_article(i, len(articles), article, sectiondir)
            # Add article to unknown_chars_articles if needed
@ -218,7 +222,7 @@ if __name__ == "__main__":
            # Decrement export limit
            max_articles_export -= 1
            # Loop over article’s related files (images …)
-            documents = section.get_documents()
+            documents = get_documents(article.id_article)
            for i, document in enumerate(documents):
                write_document(i, len(documents), document, sectiondir, 2)
        # Break line when finished exporting the section
@ -229,4 +233,4 @@ if __name__ == "__main__":
    for article in unknown_chars_articles:
        warn_unknown_chars(article)
-    db.close()  # Close the connection with the database
+    DB.close()  # Close the connection with the database
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -0,0 +1,212 @@
 from os.path import basename, splitext
 from peewee import ModelSelect
 from slugify import slugify
 from yaml import dump
 from converters import convert
 from database import (
    SpipArticles,
    SpipAuteurs,
    SpipAuteursLiens,
    SpipDocuments,
    SpipDocumentsLiens,
    SpipRubriques,
 )
 EXPORTTYPE: str = "md"
 # Convert images & files links
 # text: str = convert_documents(
 #     self.texte,
 #     [(d.id, d.titre, d.slug()) for d in self.documents()],
 # )
 class Document(SpipDocuments):
    class Meta:
        table_name: str = "spip_documents"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.titre: str = convert(self.titre, True)
        self.descriptif: str = convert(self.descriptif, True)
        self.statut: str = "false" if self.statut == "publie" else "true"
    def slug(self, date: bool = False) -> str:
        name_type: tuple[str, str] = splitext(basename(self.fichier))
        return (
            slugify((self.date_publication + "-" if date else "") + name_type[0])
            + name_type[1]
        )
 class Article(SpipArticles):
    class Meta:
        table_name: str = "spip_articles"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.titre: str = convert(self.titre, True)
        self.descriptif: str = convert(self.descriptif, True)
        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
        self.statut: str = "false" if self.statut == "publie" else "true"
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused
        # Article specific
        self.surtitle: str = convert(self.surtitre, True)  # Probably unused
        self.subtitle: str = convert(self.soustitre, True)  # Probably unused
        self.caption: str = convert(self.chapo)  # Probably unused
        self.ps: str = convert(self.ps)  # Probably unused
        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)
    def filename(self) -> str:
        return "index" + "." + self.lang + "." + EXPORTTYPE
    def frontmatter(self) -> str:
        return dump(
            {
                "lang": self.lang,
                "translationKey": self.id_trad,
                "title": self.titre,
                "publishDate": self.date,
                "lastmod": self.maj,
                "draft": self.statut,
                "description": self.descriptif,
                # Debugging
                "spip_id": self.id_article,
                "spip_id_secteur": self.id_secteur,
                # Article specific
                "surtitle": self.surtitle,
                "subtitle": self.subtitle,
                "date": self.date_redac,
                "authors": [author.nom for author in self.authors()],
                # Debugging
                "spip_id_rubrique": self.id_rubrique,
                "spip_chapo": self.caption,
            },
            allow_unicode=True,
        )
    def body(self) -> str:
        body: str = ""
        # Add the title as a Markdown h1
        if len(self.titre) > 0:
            body += "\n\n# " + self.titre
        # If there is a text, add the text preceded by two line breaks
        if len(self.texte) > 0:
            # Remove remaining HTML after & append to body
            body += "\n\n"
        # Same with an "extra" section
        if len(self.extra) > 0:
            body += "\n\n# EXTRA\n\n" + self.extra
        # If there is a caption, add the caption followed by a hr
        if hasattr(self, "caption") and len(self.caption) > 0:
            body += "\n\n" + self.caption + "\n\n***"
        # PS
        if hasattr(self, "ps") and len(self.ps) > 0:
            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
        # Microblog
        if hasattr(self, "microblog") and len(self.microblog) > 0:
            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return body
    def content(self) -> str:
        # Return the final article text
        return "---\n" + self.frontmatter() + "---" + self.body()
    def authors(self) -> list[SpipAuteurs]:
        return (
            SpipAuteurs.select()
            .join(
                SpipAuteursLiens,
                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
            )
            .where(SpipAuteursLiens.id_objet == self.id_article)
        )
 class Rubrique(SpipRubriques):
    class Meta:
        table_name: str = "spip_rubriques"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.titre: str = convert(self.titre, True)
        self.descriptif: str = convert(self.descriptif, True)
        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
        self.statut: str = "false" if self.statut == "publie" else "true"
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)
    def filename(self) -> str:
        return "index" + "." + self.lang + "." + EXPORTTYPE
    def frontmatter(self) -> str:
        return dump(
            {
                "lang": self.lang,
                "translationKey": self.id_trad,
                "title": self.titre,
                "publishDate": self.date,
                "lastmod": self.maj,
                "draft": self.statut,
                "description": self.descriptif,
                # Debugging
                "spip_id": self.id_rubrique,
                "spip_id_secteur": self.id_secteur,
            },
            allow_unicode=True,
        )
    def body(self) -> str:
        body: str = ""
        # Add the title as a Markdown h1
        if len(self.titre) > 0:
            body += "\n\n# " + self.titre
        # If there is a text, add the text preceded by two line breaks
        if len(self.texte) > 0:
            # Remove remaining HTML after & append to body
            body += "\n\n"
        # Same with an "extra" section
        if len(self.extra) > 0:
            body += "\n\n# EXTRA\n\n" + self.extra
        return body
    def content(self) -> str:
        # Return the final article text
        return "---\n" + self.frontmatter() + "---" + self.body()
 # Query the DB to retrieve all sections sorted by publication date
 def get_sections(limit: int = 10**6) -> ModelSelect:
    return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit)
 # Query the DB to retrieve all articles sorted by publication date
 def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
    return (
        Article.select()
        .where(Article.id_rubrique == section_id)
        .order_by(Article.date.desc())
        .limit(limit)
    )
 # Query the DB to retrieve all documents related to object of id object_id
 def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect:
    return (
        Document.select()
        .join(
            SpipDocumentsLiens,
            on=(Document.id_document == SpipDocumentsLiens.id_document),
        )
        .where(SpipDocumentsLiens.id_objet == object_id)
        .limit(limit)
    )